@@ -55,10 +55,16 @@ function crossover_kkt_acceptable(
5555 return true
5656end
5757
58- """ True if coordinate `j` is on a finite box bound (within `atol`)."""
59- function _crossover_at_box_bound (xj, lj, uj; atol:: Real = 1.0e-12 )
60- return (isfinite (lj) && abs (xj - lj) <= atol) ||
61- (isfinite (uj) && abs (xj - uj) <= atol)
58+ """ GPU-safe mask: coordinate `j` is on a finite box bound (within `atol`)."""
59+ function _crossover_at_box_mask (
60+ x:: AbstractVector ,
61+ lv:: AbstractVector ,
62+ uv:: AbstractVector ;
63+ atol:: Real = 1.0e-12 ,
64+ )
65+ at_l = isfinite .(lv) .& (abs .(x .- lv) .<= atol)
66+ at_u = isfinite .(uv) .& (abs .(x .- uv) .<= atol)
67+ return at_l .| at_u
6268end
6369
6470"""
@@ -78,45 +84,257 @@ function crossover_effective_bounds(
7884 ) where {T}
7985 lv_eff = copy (milp. lv)
8086 uv_eff = copy (milp. uv)
81- m, n = size (milp. A)
82- at_box = [
83- _crossover_at_box_bound (x[j], milp. lv[j], milp. uv[j]; atol = bound_atol) for j in 1 : n
84- ]
87+ at_box = _crossover_at_box_mask (x, milp. lv, milp. uv; atol = bound_atol)
88+ _crossover_effective_bounds! (
89+ lv_eff,
90+ uv_eff,
91+ milp. A,
92+ milp. lc,
93+ milp. uc,
94+ x,
95+ at_box;
96+ eq_atol,
97+ )
98+ return lv_eff, uv_eff
99+ end
100+
101+ function _crossover_effective_bounds! (
102+ lv_eff,
103+ uv_eff,
104+ A:: SparseMatrixCSC{T} ,
105+ lc,
106+ uc,
107+ x,
108+ at_box;
109+ eq_atol:: Real = 1.0e-12 ,
110+ ) where {T}
111+ m, n = size (A)
112+ at_box_cpu = Vector (at_box)
113+ x_cpu = Vector (x)
114+ lv_cpu = Vector (lv_eff)
115+ uv_cpu = Vector (uv_eff)
116+ lc_cpu = Vector (lc)
117+ uc_cpu = Vector (uc)
85118 for i in 1 : m
86- isapprox (milp . lc [i], milp . uc [i]; atol = eq_atol) || continue
119+ isapprox (lc_cpu [i], uc_cpu [i]; atol = eq_atol) || continue
87120 free = Int[]
88- slack = milp . lc [i]
121+ slack = lc_cpu [i]
89122 @inbounds for j in 1 : n
90- aij = milp . A[i, j]
123+ aij = A[i, j]
91124 aij == 0 && continue
92- if at_box [j]
93- slack -= aij * x [j]
125+ if at_box_cpu [j]
126+ slack -= aij * x_cpu [j]
94127 else
95128 push! (free, j)
96129 end
97130 end
98131 length (free) == 1 || continue
99132 j = only (free)
100- aij = milp . A[i, j]
133+ aij = A[i, j]
101134 if aij > 0
102135 implied = slack / aij
103- if ! isfinite (uv_eff [j])
104- uv_eff [j] = implied
136+ if ! isfinite (uv_cpu [j])
137+ uv_cpu [j] = implied
105138 else
106- uv_eff [j] = min (uv_eff [j], implied)
139+ uv_cpu [j] = min (uv_cpu [j], implied)
107140 end
108141 elseif aij < 0
109142 implied = slack / aij
110- if ! isfinite (lv_eff [j])
111- lv_eff [j] = implied
143+ if ! isfinite (lv_cpu [j])
144+ lv_cpu [j] = implied
112145 else
113- lv_eff[j] = max (lv_eff[j], implied)
146+ lv_cpu[j] = max (lv_cpu[j], implied)
147+ end
148+ end
149+ end
150+ lv_eff .= lv_cpu
151+ uv_eff .= uv_cpu
152+ return lv_eff, uv_eff
153+ end
154+
155+ @kernel function crossover_effective_bounds_csr! (
156+ lv_eff:: DenseVector{T} ,
157+ uv_eff:: DenseVector{T} ,
158+ A_rowptr:: DenseVector{Ti} ,
159+ A_colval:: DenseVector{Ti} ,
160+ A_nzval:: DenseVector{T} ,
161+ lc:: DenseVector{T} ,
162+ uc:: DenseVector{T} ,
163+ x:: DenseVector{T} ,
164+ at_box:: DenseVector{Bool} ,
165+ eq_atol:: T ,
166+ ) where {T, Ti}
167+ i = @index (Global, Linear)
168+ if abs (lc[i] - uc[i]) <= eq_atol
169+ slack = lc[i]
170+ free_j = zero (Ti)
171+ free_aij = zero (T)
172+ n_free = zero (Ti)
173+ for k in A_rowptr[i]: (A_rowptr[i + Ti (1 )] - Ti (1 ))
174+ j = A_colval[k]
175+ aij = A_nzval[k]
176+ if at_box[j]
177+ slack -= aij * x[j]
178+ else
179+ n_free += Ti (1 )
180+ if n_free == Ti (1 )
181+ free_j = j
182+ free_aij = aij
183+ end
184+ end
185+ end
186+ if n_free == Ti (1 )
187+ j = free_j
188+ aij = free_aij
189+ if aij > zero (T)
190+ implied = slack / aij
191+ old = Atomix. @atomic uv_eff[j]
192+ while implied < old
193+ (old2, success) = Atomix. @atomicreplace uv_eff[j] old => implied
194+ success && break
195+ old = old2
196+ end
197+ elseif aij < zero (T)
198+ implied = slack / aij
199+ old = Atomix. @atomic lv_eff[j]
200+ while implied > old
201+ (old2, success) = Atomix. @atomicreplace lv_eff[j] old => implied
202+ success && break
203+ old = old2
204+ end
114205 end
115206 end
116207 end
208+ end
209+
210+ function _crossover_effective_bounds! (
211+ lv_eff,
212+ uv_eff,
213+ A:: GPUSparseMatrixCSR ,
214+ lc,
215+ uc,
216+ x,
217+ at_box;
218+ eq_atol:: Real = 1.0e-12 ,
219+ )
220+ backend = common_backend (lv_eff, uv_eff, A, lc, uc, x, at_box)
221+ eq_tol = eltype (x)(eq_atol)
222+ kernel! = crossover_effective_bounds_csr! (backend)
223+ kernel! (
224+ lv_eff,
225+ uv_eff,
226+ A. rowptr,
227+ A. colval,
228+ A. nzval,
229+ lc,
230+ uc,
231+ x,
232+ at_box,
233+ eq_tol;
234+ ndrange = size (A, 1 ),
235+ )
117236 return lv_eff, uv_eff
118237end
119238
239+ @kernel function crossover_effective_bounds_ell! (
240+ lv_eff:: DenseVector{T} ,
241+ uv_eff:: DenseVector{T} ,
242+ A_colval:: AbstractMatrix{Ti} ,
243+ A_nzval:: AbstractMatrix{T} ,
244+ lc:: DenseVector{T} ,
245+ uc:: DenseVector{T} ,
246+ x:: DenseVector{T} ,
247+ at_box:: DenseVector{Bool} ,
248+ eq_atol:: T ,
249+ ) where {T, Ti}
250+ i = @index (Global, Linear)
251+ if abs (lc[i] - uc[i]) <= eq_atol
252+ slack = lc[i]
253+ free_j = zero (Ti)
254+ free_aij = zero (T)
255+ n_free = zero (Ti)
256+ for k in axes (A_colval, 2 )
257+ j = A_colval[i, k]
258+ if j != zero (Ti)
259+ aij = A_nzval[i, k]
260+ if at_box[j]
261+ slack -= aij * x[j]
262+ else
263+ n_free += Ti (1 )
264+ if n_free == Ti (1 )
265+ free_j = j
266+ free_aij = aij
267+ end
268+ end
269+ end
270+ end
271+ if n_free == Ti (1 )
272+ j = free_j
273+ aij = free_aij
274+ if aij > zero (T)
275+ implied = slack / aij
276+ old = Atomix. @atomic uv_eff[j]
277+ while implied < old
278+ (old2, success) = Atomix. @atomicreplace uv_eff[j] old => implied
279+ success && break
280+ old = old2
281+ end
282+ elseif aij < zero (T)
283+ implied = slack / aij
284+ old = Atomix. @atomic lv_eff[j]
285+ while implied > old
286+ (old2, success) = Atomix. @atomicreplace lv_eff[j] old => implied
287+ success && break
288+ old = old2
289+ end
290+ end
291+ end
292+ end
293+ end
294+
295+ function _crossover_effective_bounds! (
296+ lv_eff,
297+ uv_eff,
298+ A:: GPUSparseMatrixELL ,
299+ lc,
300+ uc,
301+ x,
302+ at_box;
303+ eq_atol:: Real = 1.0e-12 ,
304+ )
305+ backend = common_backend (lv_eff, uv_eff, A, lc, uc, x, at_box)
306+ eq_tol = eltype (x)(eq_atol)
307+ kernel! = crossover_effective_bounds_ell! (backend)
308+ kernel! (
309+ lv_eff,
310+ uv_eff,
311+ A. colval,
312+ A. nzval,
313+ lc,
314+ uc,
315+ x,
316+ at_box,
317+ eq_tol;
318+ ndrange = size (A, 1 ),
319+ )
320+ return lv_eff, uv_eff
321+ end
322+
323+ function _crossover_effective_bounds! (
324+ lv_eff,
325+ uv_eff,
326+ A:: GPUSparseMatrixCOO ,
327+ lc,
328+ uc,
329+ x,
330+ at_box;
331+ eq_atol:: Real = 1.0e-12 ,
332+ )
333+ backend = get_backend (x)
334+ A_csr = adapt (backend, GPUSparseMatrixCSR (SparseMatrixCSC (A)))
335+ return _crossover_effective_bounds! (lv_eff, uv_eff, A_csr, lc, uc, x, at_box; eq_atol)
336+ end
337+
120338"""
121339 crossover_threshold!(x, lv, uv, params::CrossoverParameters)
122340
167385
168386""" Count coordinates changed by crossover (exact compare)."""
169387function crossover_n_changed (x_after, x_before)
170- return count (i -> x_after[i] != x_before[i], eachindex (x_before) )
388+ return sum ( x_after . != x_before)
171389end
172390
173391"""
0 commit comments