Skip to content

Commit ead0983

Browse files
author
Charles Vielzeuf
committed
Fix GPU crossover with CSR kernels and broadcast-safe helpers.
Run effective bounds on device via row kernels (CSR/ELL) with atomic bound merging, keep CSC on CPU, and vectorize crossover_n_changed so docs and CI GPU tutorial examples no longer hit scalar indexing errors.
1 parent 6dee8aa commit ead0983

2 files changed

Lines changed: 260 additions & 22 deletions

File tree

src/components/crossover.jl

Lines changed: 239 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,16 @@ function crossover_kkt_acceptable(
5555
return true
5656
end
5757

58-
"""True if coordinate `j` is on a finite box bound (within `atol`)."""
59-
function _crossover_at_box_bound(xj, lj, uj; atol::Real = 1.0e-12)
60-
return (isfinite(lj) && abs(xj - lj) <= atol) ||
61-
(isfinite(uj) && abs(xj - uj) <= atol)
58+
"""GPU-safe mask: coordinate `j` is on a finite box bound (within `atol`)."""
59+
function _crossover_at_box_mask(
60+
x::AbstractVector,
61+
lv::AbstractVector,
62+
uv::AbstractVector;
63+
atol::Real = 1.0e-12,
64+
)
65+
at_l = isfinite.(lv) .& (abs.(x .- lv) .<= atol)
66+
at_u = isfinite.(uv) .& (abs.(x .- uv) .<= atol)
67+
return at_l .| at_u
6268
end
6369

6470
"""
@@ -78,45 +84,257 @@ function crossover_effective_bounds(
7884
) where {T}
7985
lv_eff = copy(milp.lv)
8086
uv_eff = copy(milp.uv)
81-
m, n = size(milp.A)
82-
at_box = [
83-
_crossover_at_box_bound(x[j], milp.lv[j], milp.uv[j]; atol = bound_atol) for j in 1:n
84-
]
87+
at_box = _crossover_at_box_mask(x, milp.lv, milp.uv; atol = bound_atol)
88+
_crossover_effective_bounds!(
89+
lv_eff,
90+
uv_eff,
91+
milp.A,
92+
milp.lc,
93+
milp.uc,
94+
x,
95+
at_box;
96+
eq_atol,
97+
)
98+
return lv_eff, uv_eff
99+
end
100+
101+
function _crossover_effective_bounds!(
102+
lv_eff,
103+
uv_eff,
104+
A::SparseMatrixCSC{T},
105+
lc,
106+
uc,
107+
x,
108+
at_box;
109+
eq_atol::Real = 1.0e-12,
110+
) where {T}
111+
m, n = size(A)
112+
at_box_cpu = Vector(at_box)
113+
x_cpu = Vector(x)
114+
lv_cpu = Vector(lv_eff)
115+
uv_cpu = Vector(uv_eff)
116+
lc_cpu = Vector(lc)
117+
uc_cpu = Vector(uc)
85118
for i in 1:m
86-
isapprox(milp.lc[i], milp.uc[i]; atol = eq_atol) || continue
119+
isapprox(lc_cpu[i], uc_cpu[i]; atol = eq_atol) || continue
87120
free = Int[]
88-
slack = milp.lc[i]
121+
slack = lc_cpu[i]
89122
@inbounds for j in 1:n
90-
aij = milp.A[i, j]
123+
aij = A[i, j]
91124
aij == 0 && continue
92-
if at_box[j]
93-
slack -= aij * x[j]
125+
if at_box_cpu[j]
126+
slack -= aij * x_cpu[j]
94127
else
95128
push!(free, j)
96129
end
97130
end
98131
length(free) == 1 || continue
99132
j = only(free)
100-
aij = milp.A[i, j]
133+
aij = A[i, j]
101134
if aij > 0
102135
implied = slack / aij
103-
if !isfinite(uv_eff[j])
104-
uv_eff[j] = implied
136+
if !isfinite(uv_cpu[j])
137+
uv_cpu[j] = implied
105138
else
106-
uv_eff[j] = min(uv_eff[j], implied)
139+
uv_cpu[j] = min(uv_cpu[j], implied)
107140
end
108141
elseif aij < 0
109142
implied = slack / aij
110-
if !isfinite(lv_eff[j])
111-
lv_eff[j] = implied
143+
if !isfinite(lv_cpu[j])
144+
lv_cpu[j] = implied
112145
else
113-
lv_eff[j] = max(lv_eff[j], implied)
146+
lv_cpu[j] = max(lv_cpu[j], implied)
147+
end
148+
end
149+
end
150+
lv_eff .= lv_cpu
151+
uv_eff .= uv_cpu
152+
return lv_eff, uv_eff
153+
end
154+
155+
@kernel function crossover_effective_bounds_csr!(
156+
lv_eff::DenseVector{T},
157+
uv_eff::DenseVector{T},
158+
A_rowptr::DenseVector{Ti},
159+
A_colval::DenseVector{Ti},
160+
A_nzval::DenseVector{T},
161+
lc::DenseVector{T},
162+
uc::DenseVector{T},
163+
x::DenseVector{T},
164+
at_box::DenseVector{Bool},
165+
eq_atol::T,
166+
) where {T, Ti}
167+
i = @index(Global, Linear)
168+
if abs(lc[i] - uc[i]) <= eq_atol
169+
slack = lc[i]
170+
free_j = zero(Ti)
171+
free_aij = zero(T)
172+
n_free = zero(Ti)
173+
for k in A_rowptr[i]:(A_rowptr[i + Ti(1)] - Ti(1))
174+
j = A_colval[k]
175+
aij = A_nzval[k]
176+
if at_box[j]
177+
slack -= aij * x[j]
178+
else
179+
n_free += Ti(1)
180+
if n_free == Ti(1)
181+
free_j = j
182+
free_aij = aij
183+
end
184+
end
185+
end
186+
if n_free == Ti(1)
187+
j = free_j
188+
aij = free_aij
189+
if aij > zero(T)
190+
implied = slack / aij
191+
old = Atomix.@atomic uv_eff[j]
192+
while implied < old
193+
(old2, success) = Atomix.@atomicreplace uv_eff[j] old => implied
194+
success && break
195+
old = old2
196+
end
197+
elseif aij < zero(T)
198+
implied = slack / aij
199+
old = Atomix.@atomic lv_eff[j]
200+
while implied > old
201+
(old2, success) = Atomix.@atomicreplace lv_eff[j] old => implied
202+
success && break
203+
old = old2
204+
end
114205
end
115206
end
116207
end
208+
end
209+
210+
function _crossover_effective_bounds!(
211+
lv_eff,
212+
uv_eff,
213+
A::GPUSparseMatrixCSR,
214+
lc,
215+
uc,
216+
x,
217+
at_box;
218+
eq_atol::Real = 1.0e-12,
219+
)
220+
backend = common_backend(lv_eff, uv_eff, A, lc, uc, x, at_box)
221+
eq_tol = eltype(x)(eq_atol)
222+
kernel! = crossover_effective_bounds_csr!(backend)
223+
kernel!(
224+
lv_eff,
225+
uv_eff,
226+
A.rowptr,
227+
A.colval,
228+
A.nzval,
229+
lc,
230+
uc,
231+
x,
232+
at_box,
233+
eq_tol;
234+
ndrange = size(A, 1),
235+
)
117236
return lv_eff, uv_eff
118237
end
119238

239+
@kernel function crossover_effective_bounds_ell!(
240+
lv_eff::DenseVector{T},
241+
uv_eff::DenseVector{T},
242+
A_colval::AbstractMatrix{Ti},
243+
A_nzval::AbstractMatrix{T},
244+
lc::DenseVector{T},
245+
uc::DenseVector{T},
246+
x::DenseVector{T},
247+
at_box::DenseVector{Bool},
248+
eq_atol::T,
249+
) where {T, Ti}
250+
i = @index(Global, Linear)
251+
if abs(lc[i] - uc[i]) <= eq_atol
252+
slack = lc[i]
253+
free_j = zero(Ti)
254+
free_aij = zero(T)
255+
n_free = zero(Ti)
256+
for k in axes(A_colval, 2)
257+
j = A_colval[i, k]
258+
if j != zero(Ti)
259+
aij = A_nzval[i, k]
260+
if at_box[j]
261+
slack -= aij * x[j]
262+
else
263+
n_free += Ti(1)
264+
if n_free == Ti(1)
265+
free_j = j
266+
free_aij = aij
267+
end
268+
end
269+
end
270+
end
271+
if n_free == Ti(1)
272+
j = free_j
273+
aij = free_aij
274+
if aij > zero(T)
275+
implied = slack / aij
276+
old = Atomix.@atomic uv_eff[j]
277+
while implied < old
278+
(old2, success) = Atomix.@atomicreplace uv_eff[j] old => implied
279+
success && break
280+
old = old2
281+
end
282+
elseif aij < zero(T)
283+
implied = slack / aij
284+
old = Atomix.@atomic lv_eff[j]
285+
while implied > old
286+
(old2, success) = Atomix.@atomicreplace lv_eff[j] old => implied
287+
success && break
288+
old = old2
289+
end
290+
end
291+
end
292+
end
293+
end
294+
295+
function _crossover_effective_bounds!(
296+
lv_eff,
297+
uv_eff,
298+
A::GPUSparseMatrixELL,
299+
lc,
300+
uc,
301+
x,
302+
at_box;
303+
eq_atol::Real = 1.0e-12,
304+
)
305+
backend = common_backend(lv_eff, uv_eff, A, lc, uc, x, at_box)
306+
eq_tol = eltype(x)(eq_atol)
307+
kernel! = crossover_effective_bounds_ell!(backend)
308+
kernel!(
309+
lv_eff,
310+
uv_eff,
311+
A.colval,
312+
A.nzval,
313+
lc,
314+
uc,
315+
x,
316+
at_box,
317+
eq_tol;
318+
ndrange = size(A, 1),
319+
)
320+
return lv_eff, uv_eff
321+
end
322+
323+
function _crossover_effective_bounds!(
324+
lv_eff,
325+
uv_eff,
326+
A::GPUSparseMatrixCOO,
327+
lc,
328+
uc,
329+
x,
330+
at_box;
331+
eq_atol::Real = 1.0e-12,
332+
)
333+
backend = get_backend(x)
334+
A_csr = adapt(backend, GPUSparseMatrixCSR(SparseMatrixCSC(A)))
335+
return _crossover_effective_bounds!(lv_eff, uv_eff, A_csr, lc, uc, x, at_box; eq_atol)
336+
end
337+
120338
"""
121339
crossover_threshold!(x, lv, uv, params::CrossoverParameters)
122340
@@ -167,7 +385,7 @@ end
167385

168386
"""Count coordinates changed by crossover (exact compare)."""
169387
function crossover_n_changed(x_after, x_before)
170-
return count(i -> x_after[i] != x_before[i], eachindex(x_before))
388+
return sum(x_after .!= x_before)
171389
end
172390

173391
"""

test/components/crossover.jl

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,26 @@ end
4343
@test isinf(uv_eff[2])
4444
end
4545

46+
@testset "crossover_effective_bounds GPU" begin
47+
using Adapt
48+
using JLArrays: JLBackend, JLArray
49+
50+
milp = MILP(;
51+
c = [1.0, 2.0],
52+
lv = [0.0, 0.0],
53+
uv = [Inf, Inf],
54+
A = sparse([1.0 1.0]),
55+
lc = [1.0],
56+
uc = [1.0],
57+
)
58+
milp_gpu = adapt(JLBackend(), CoolPDLP.set_matrix_type(CoolPDLP.GPUSparseMatrixCSR, milp))
59+
x = adapt(JLBackend(), [1.0 - 1.0e-7, 0.0])
60+
lv_eff, uv_eff = CoolPDLP.crossover_effective_bounds(milp_gpu, x)
61+
@test lv_eff isa JLArray
62+
@test isapprox(Array(uv_eff)[1], 1.0; atol = 1.0e-12)
63+
@test isinf(Array(uv_eff)[2])
64+
end
65+
4666
@testset "crossover improves fraction_at_bounds" begin
4767
milp = MILP(;
4868
c = [1.0, 2.0],
@@ -132,7 +152,7 @@ end
132152
end
133153

134154
@testset "crossover rollback" begin
135-
@testset "crossover_kkt_acceptable edge cases" begin
155+
@testset "crossover_kkt_acceptable edge cases" begin
136156
err_ok = _kkt_err(1.0e-5, 1.0e-5, 1.0e-5)
137157
err_cert_fail = _kkt_err(2.0e-4, 1.0e-5, 1.0e-5)
138158
err_regress = _kkt_err(1.1e-5, 1.0e-5, 1.0e-5)

0 commit comments

Comments
 (0)