@@ -152,19 +152,56 @@ function _crossover_effective_bounds!(
152152 return lv_eff, uv_eff
153153end
154154
155- @kernel function crossover_effective_bounds_csr! (
156- lv_eff:: DenseVector{T} ,
157- uv_eff:: DenseVector{T} ,
155+ function _crossover_merge_row_bounds! (
156+ lv_eff,
157+ uv_eff,
158+ row_active,
159+ row_upper,
160+ row_j,
161+ row_implied,
162+ )
163+ lv_cpu = Vector (lv_eff)
164+ uv_cpu = Vector (uv_eff)
165+ @inbounds for i in eachindex (row_active)
166+ row_active[i] || continue
167+ j = row_j[i]
168+ implied = row_implied[i]
169+ if row_upper[i]
170+ if ! isfinite (uv_cpu[j])
171+ uv_cpu[j] = implied
172+ else
173+ uv_cpu[j] = min (uv_cpu[j], implied)
174+ end
175+ else
176+ if ! isfinite (lv_cpu[j])
177+ lv_cpu[j] = implied
178+ else
179+ lv_cpu[j] = max (lv_cpu[j], implied)
180+ end
181+ end
182+ end
183+ backend = get_backend (lv_eff)
184+ copyto! (lv_eff, adapt (backend, lv_cpu))
185+ copyto! (uv_eff, adapt (backend, uv_cpu))
186+ return lv_eff, uv_eff
187+ end
188+
189+ @kernel function crossover_effective_bounds_csr_scan! (
190+ row_active:: DenseVector{UInt8} ,
191+ row_upper:: DenseVector{UInt8} ,
192+ row_j:: DenseVector{Ti} ,
193+ row_implied:: DenseVector{T} ,
158194 A_rowptr:: DenseVector{Ti} ,
159195 A_colval:: DenseVector{Ti} ,
160196 A_nzval:: DenseVector{T} ,
161197 lc:: DenseVector{T} ,
162198 uc:: DenseVector{T} ,
163199 x:: DenseVector{T} ,
164- at_box:: DenseVector{Bool } ,
200+ at_box:: DenseVector{UInt8 } ,
165201 eq_atol:: T ,
166202 ) where {T, Ti}
167203 i = @index (Global, Linear)
204+ row_active[i] = zero (UInt8)
168205 if abs (lc[i] - uc[i]) <= eq_atol
169206 slack = lc[i]
170207 free_j = zero (Ti)
173210 for k in A_rowptr[i]: (A_rowptr[i + Ti (1 )] - Ti (1 ))
174211 j = A_colval[k]
175212 aij = A_nzval[k]
176- if at_box[j]
213+ if at_box[j] != zero (UInt8)
177214 slack -= aij * x[j]
178215 else
179216 n_free += Ti (1 )
@@ -184,24 +221,12 @@ end
184221 end
185222 end
186223 if n_free == Ti (1 )
187- j = free_j
188224 aij = free_aij
189- if aij > zero (T)
190- implied = slack / aij
191- old = Atomix. @atomic uv_eff[j]
192- while implied < old
193- (old2, success) = Atomix. @atomicreplace uv_eff[j] old => implied
194- success && break
195- old = old2
196- end
197- elseif aij < zero (T)
198- implied = slack / aij
199- old = Atomix. @atomic lv_eff[j]
200- while implied > old
201- (old2, success) = Atomix. @atomicreplace lv_eff[j] old => implied
202- success && break
203- old = old2
204- end
225+ if aij != zero (T)
226+ row_active[i] = one (UInt8)
227+ row_j[i] = free_j
228+ row_implied[i] = slack / aij
229+ row_upper[i] = aij > zero (T) ? one (UInt8) : zero (UInt8)
205230 end
206231 end
207232 end
@@ -217,37 +242,57 @@ function _crossover_effective_bounds!(
217242 at_box;
218243 eq_atol:: Real = 1.0e-12 ,
219244 )
220- backend = common_backend (lv_eff, uv_eff, A, lc, uc, x, at_box)
221- eq_tol = eltype (x)(eq_atol)
222- kernel! = crossover_effective_bounds_csr! (backend)
245+ T = eltype (x)
246+ Ti = eltype (A. rowptr)
247+ m = size (A, 1 )
248+ backend = common_backend (A, lc, uc, x, at_box)
249+ eq_tol = T (eq_atol)
250+ at_box_u8 = adapt (backend, UInt8 .(at_box))
251+ row_active = allocate (backend, UInt8, m)
252+ row_upper = allocate (backend, UInt8, m)
253+ row_j = allocate (backend, Ti, m)
254+ row_implied = allocate (backend, T, m)
255+ kernel! = crossover_effective_bounds_csr_scan! (backend)
223256 kernel! (
224- lv_eff,
225- uv_eff,
257+ row_active,
258+ row_upper,
259+ row_j,
260+ row_implied,
226261 A. rowptr,
227262 A. colval,
228263 A. nzval,
229264 lc,
230265 uc,
231266 x,
232- at_box ,
267+ at_box_u8 ,
233268 eq_tol;
234- ndrange = size (A, 1 ),
269+ ndrange = m,
270+ )
271+ return _crossover_merge_row_bounds! (
272+ lv_eff,
273+ uv_eff,
274+ Vector (row_active) .!= 0 ,
275+ Vector (row_upper) .!= 0 ,
276+ Vector (row_j),
277+ Vector (row_implied),
235278 )
236- return lv_eff, uv_eff
237279end
238280
239- @kernel function crossover_effective_bounds_ell! (
240- lv_eff:: DenseVector{T} ,
241- uv_eff:: DenseVector{T} ,
281+ @kernel function crossover_effective_bounds_ell_scan! (
282+ row_active:: DenseVector{UInt8} ,
283+ row_upper:: DenseVector{UInt8} ,
284+ row_j:: DenseVector{Ti} ,
285+ row_implied:: DenseVector{T} ,
242286 A_colval:: AbstractMatrix{Ti} ,
243287 A_nzval:: AbstractMatrix{T} ,
244288 lc:: DenseVector{T} ,
245289 uc:: DenseVector{T} ,
246290 x:: DenseVector{T} ,
247- at_box:: DenseVector{Bool } ,
291+ at_box:: DenseVector{UInt8 } ,
248292 eq_atol:: T ,
249293 ) where {T, Ti}
250294 i = @index (Global, Linear)
295+ row_active[i] = zero (UInt8)
251296 if abs (lc[i] - uc[i]) <= eq_atol
252297 slack = lc[i]
253298 free_j = zero (Ti)
257302 j = A_colval[i, k]
258303 if j != zero (Ti)
259304 aij = A_nzval[i, k]
260- if at_box[j]
305+ if at_box[j] != zero (UInt8)
261306 slack -= aij * x[j]
262307 else
263308 n_free += Ti (1 )
@@ -269,24 +314,12 @@ end
269314 end
270315 end
271316 if n_free == Ti (1 )
272- j = free_j
273317 aij = free_aij
274- if aij > zero (T)
275- implied = slack / aij
276- old = Atomix. @atomic uv_eff[j]
277- while implied < old
278- (old2, success) = Atomix. @atomicreplace uv_eff[j] old => implied
279- success && break
280- old = old2
281- end
282- elseif aij < zero (T)
283- implied = slack / aij
284- old = Atomix. @atomic lv_eff[j]
285- while implied > old
286- (old2, success) = Atomix. @atomicreplace lv_eff[j] old => implied
287- success && break
288- old = old2
289- end
318+ if aij != zero (T)
319+ row_active[i] = one (UInt8)
320+ row_j[i] = free_j
321+ row_implied[i] = slack / aij
322+ row_upper[i] = aij > zero (T) ? one (UInt8) : zero (UInt8)
290323 end
291324 end
292325 end
@@ -302,22 +335,39 @@ function _crossover_effective_bounds!(
302335 at_box;
303336 eq_atol:: Real = 1.0e-12 ,
304337 )
305- backend = common_backend (lv_eff, uv_eff, A, lc, uc, x, at_box)
306- eq_tol = eltype (x)(eq_atol)
307- kernel! = crossover_effective_bounds_ell! (backend)
338+ T = eltype (x)
339+ Ti = eltype (A. colval)
340+ m = size (A, 1 )
341+ backend = common_backend (A, lc, uc, x, at_box)
342+ eq_tol = T (eq_atol)
343+ at_box_u8 = adapt (backend, UInt8 .(at_box))
344+ row_active = allocate (backend, UInt8, m)
345+ row_upper = allocate (backend, UInt8, m)
346+ row_j = allocate (backend, Ti, m)
347+ row_implied = allocate (backend, T, m)
348+ kernel! = crossover_effective_bounds_ell_scan! (backend)
308349 kernel! (
309- lv_eff,
310- uv_eff,
350+ row_active,
351+ row_upper,
352+ row_j,
353+ row_implied,
311354 A. colval,
312355 A. nzval,
313356 lc,
314357 uc,
315358 x,
316- at_box ,
359+ at_box_u8 ,
317360 eq_tol;
318- ndrange = size (A, 1 ),
361+ ndrange = m,
362+ )
363+ return _crossover_merge_row_bounds! (
364+ lv_eff,
365+ uv_eff,
366+ Vector (row_active) .!= 0 ,
367+ Vector (row_upper) .!= 0 ,
368+ Vector (row_j),
369+ Vector (row_implied),
319370 )
320- return lv_eff, uv_eff
321371end
322372
323373function _crossover_effective_bounds! (
0 commit comments