Skip to content

Commit a4e3058

Browse files
author
Charles Vielzeuf
committed
Expand crossover tests and fix CUDA effective bounds merge.
Add unit tests for implied-bound branches, backend agreement, pipeline consistency, and optional CUDA coverage. Replace in-kernel atomic bound updates with row scan plus host merge so CuSparseMatrixCSR matches CSC/JL results on equality rows.
1 parent 545f465 commit a4e3058

2 files changed

Lines changed: 338 additions & 60 deletions

File tree

src/components/crossover.jl

Lines changed: 110 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -152,19 +152,56 @@ function _crossover_effective_bounds!(
152152
return lv_eff, uv_eff
153153
end
154154

155-
@kernel function crossover_effective_bounds_csr!(
156-
lv_eff::DenseVector{T},
157-
uv_eff::DenseVector{T},
155+
function _crossover_merge_row_bounds!(
156+
lv_eff,
157+
uv_eff,
158+
row_active,
159+
row_upper,
160+
row_j,
161+
row_implied,
162+
)
163+
lv_cpu = Vector(lv_eff)
164+
uv_cpu = Vector(uv_eff)
165+
@inbounds for i in eachindex(row_active)
166+
row_active[i] || continue
167+
j = row_j[i]
168+
implied = row_implied[i]
169+
if row_upper[i]
170+
if !isfinite(uv_cpu[j])
171+
uv_cpu[j] = implied
172+
else
173+
uv_cpu[j] = min(uv_cpu[j], implied)
174+
end
175+
else
176+
if !isfinite(lv_cpu[j])
177+
lv_cpu[j] = implied
178+
else
179+
lv_cpu[j] = max(lv_cpu[j], implied)
180+
end
181+
end
182+
end
183+
backend = get_backend(lv_eff)
184+
copyto!(lv_eff, adapt(backend, lv_cpu))
185+
copyto!(uv_eff, adapt(backend, uv_cpu))
186+
return lv_eff, uv_eff
187+
end
188+
189+
@kernel function crossover_effective_bounds_csr_scan!(
190+
row_active::DenseVector{UInt8},
191+
row_upper::DenseVector{UInt8},
192+
row_j::DenseVector{Ti},
193+
row_implied::DenseVector{T},
158194
A_rowptr::DenseVector{Ti},
159195
A_colval::DenseVector{Ti},
160196
A_nzval::DenseVector{T},
161197
lc::DenseVector{T},
162198
uc::DenseVector{T},
163199
x::DenseVector{T},
164-
at_box::DenseVector{Bool},
200+
at_box::DenseVector{UInt8},
165201
eq_atol::T,
166202
) where {T, Ti}
167203
i = @index(Global, Linear)
204+
row_active[i] = zero(UInt8)
168205
if abs(lc[i] - uc[i]) <= eq_atol
169206
slack = lc[i]
170207
free_j = zero(Ti)
@@ -173,7 +210,7 @@ end
173210
for k in A_rowptr[i]:(A_rowptr[i + Ti(1)] - Ti(1))
174211
j = A_colval[k]
175212
aij = A_nzval[k]
176-
if at_box[j]
213+
if at_box[j] != zero(UInt8)
177214
slack -= aij * x[j]
178215
else
179216
n_free += Ti(1)
@@ -184,24 +221,12 @@ end
184221
end
185222
end
186223
if n_free == Ti(1)
187-
j = free_j
188224
aij = free_aij
189-
if aij > zero(T)
190-
implied = slack / aij
191-
old = Atomix.@atomic uv_eff[j]
192-
while implied < old
193-
(old2, success) = Atomix.@atomicreplace uv_eff[j] old => implied
194-
success && break
195-
old = old2
196-
end
197-
elseif aij < zero(T)
198-
implied = slack / aij
199-
old = Atomix.@atomic lv_eff[j]
200-
while implied > old
201-
(old2, success) = Atomix.@atomicreplace lv_eff[j] old => implied
202-
success && break
203-
old = old2
204-
end
225+
if aij != zero(T)
226+
row_active[i] = one(UInt8)
227+
row_j[i] = free_j
228+
row_implied[i] = slack / aij
229+
row_upper[i] = aij > zero(T) ? one(UInt8) : zero(UInt8)
205230
end
206231
end
207232
end
@@ -217,37 +242,57 @@ function _crossover_effective_bounds!(
217242
at_box;
218243
eq_atol::Real = 1.0e-12,
219244
)
220-
backend = common_backend(lv_eff, uv_eff, A, lc, uc, x, at_box)
221-
eq_tol = eltype(x)(eq_atol)
222-
kernel! = crossover_effective_bounds_csr!(backend)
245+
T = eltype(x)
246+
Ti = eltype(A.rowptr)
247+
m = size(A, 1)
248+
backend = common_backend(A, lc, uc, x, at_box)
249+
eq_tol = T(eq_atol)
250+
at_box_u8 = adapt(backend, UInt8.(at_box))
251+
row_active = allocate(backend, UInt8, m)
252+
row_upper = allocate(backend, UInt8, m)
253+
row_j = allocate(backend, Ti, m)
254+
row_implied = allocate(backend, T, m)
255+
kernel! = crossover_effective_bounds_csr_scan!(backend)
223256
kernel!(
224-
lv_eff,
225-
uv_eff,
257+
row_active,
258+
row_upper,
259+
row_j,
260+
row_implied,
226261
A.rowptr,
227262
A.colval,
228263
A.nzval,
229264
lc,
230265
uc,
231266
x,
232-
at_box,
267+
at_box_u8,
233268
eq_tol;
234-
ndrange = size(A, 1),
269+
ndrange = m,
270+
)
271+
return _crossover_merge_row_bounds!(
272+
lv_eff,
273+
uv_eff,
274+
Vector(row_active) .!= 0,
275+
Vector(row_upper) .!= 0,
276+
Vector(row_j),
277+
Vector(row_implied),
235278
)
236-
return lv_eff, uv_eff
237279
end
238280

239-
@kernel function crossover_effective_bounds_ell!(
240-
lv_eff::DenseVector{T},
241-
uv_eff::DenseVector{T},
281+
@kernel function crossover_effective_bounds_ell_scan!(
282+
row_active::DenseVector{UInt8},
283+
row_upper::DenseVector{UInt8},
284+
row_j::DenseVector{Ti},
285+
row_implied::DenseVector{T},
242286
A_colval::AbstractMatrix{Ti},
243287
A_nzval::AbstractMatrix{T},
244288
lc::DenseVector{T},
245289
uc::DenseVector{T},
246290
x::DenseVector{T},
247-
at_box::DenseVector{Bool},
291+
at_box::DenseVector{UInt8},
248292
eq_atol::T,
249293
) where {T, Ti}
250294
i = @index(Global, Linear)
295+
row_active[i] = zero(UInt8)
251296
if abs(lc[i] - uc[i]) <= eq_atol
252297
slack = lc[i]
253298
free_j = zero(Ti)
@@ -257,7 +302,7 @@ end
257302
j = A_colval[i, k]
258303
if j != zero(Ti)
259304
aij = A_nzval[i, k]
260-
if at_box[j]
305+
if at_box[j] != zero(UInt8)
261306
slack -= aij * x[j]
262307
else
263308
n_free += Ti(1)
@@ -269,24 +314,12 @@ end
269314
end
270315
end
271316
if n_free == Ti(1)
272-
j = free_j
273317
aij = free_aij
274-
if aij > zero(T)
275-
implied = slack / aij
276-
old = Atomix.@atomic uv_eff[j]
277-
while implied < old
278-
(old2, success) = Atomix.@atomicreplace uv_eff[j] old => implied
279-
success && break
280-
old = old2
281-
end
282-
elseif aij < zero(T)
283-
implied = slack / aij
284-
old = Atomix.@atomic lv_eff[j]
285-
while implied > old
286-
(old2, success) = Atomix.@atomicreplace lv_eff[j] old => implied
287-
success && break
288-
old = old2
289-
end
318+
if aij != zero(T)
319+
row_active[i] = one(UInt8)
320+
row_j[i] = free_j
321+
row_implied[i] = slack / aij
322+
row_upper[i] = aij > zero(T) ? one(UInt8) : zero(UInt8)
290323
end
291324
end
292325
end
@@ -302,22 +335,39 @@ function _crossover_effective_bounds!(
302335
at_box;
303336
eq_atol::Real = 1.0e-12,
304337
)
305-
backend = common_backend(lv_eff, uv_eff, A, lc, uc, x, at_box)
306-
eq_tol = eltype(x)(eq_atol)
307-
kernel! = crossover_effective_bounds_ell!(backend)
338+
T = eltype(x)
339+
Ti = eltype(A.colval)
340+
m = size(A, 1)
341+
backend = common_backend(A, lc, uc, x, at_box)
342+
eq_tol = T(eq_atol)
343+
at_box_u8 = adapt(backend, UInt8.(at_box))
344+
row_active = allocate(backend, UInt8, m)
345+
row_upper = allocate(backend, UInt8, m)
346+
row_j = allocate(backend, Ti, m)
347+
row_implied = allocate(backend, T, m)
348+
kernel! = crossover_effective_bounds_ell_scan!(backend)
308349
kernel!(
309-
lv_eff,
310-
uv_eff,
350+
row_active,
351+
row_upper,
352+
row_j,
353+
row_implied,
311354
A.colval,
312355
A.nzval,
313356
lc,
314357
uc,
315358
x,
316-
at_box,
359+
at_box_u8,
317360
eq_tol;
318-
ndrange = size(A, 1),
361+
ndrange = m,
362+
)
363+
return _crossover_merge_row_bounds!(
364+
lv_eff,
365+
uv_eff,
366+
Vector(row_active) .!= 0,
367+
Vector(row_upper) .!= 0,
368+
Vector(row_j),
369+
Vector(row_implied),
319370
)
320-
return lv_eff, uv_eff
321371
end
322372

323373
function _crossover_effective_bounds!(

0 commit comments

Comments
 (0)