Skip to content

Commit 3cb59d4

Browse files
LasNikasLasNikas
andauthored
Avoid atomic_cas in open boundaries and add gpu tests (#849)
* make activating particle thread save * fix assertion * fix gpu * use bool-vector * add gpu tests * fix unit test * fix * implement suggestions --------- Co-authored-by: LasNikas <niklas.nehe@web.de>
1 parent c9069f1 commit 3cb59d4

6 files changed

Lines changed: 142 additions & 95 deletions

File tree

src/general/buffer.jl

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,7 @@ end
77

88
function SystemBuffer(active_size, buffer_size::Integer)
99
# Using a `BitVector` is not an option as writing to it is not thread-safe.
10-
# Also, to ensure thread-safe particle activation, we use an `atomic_cas` operation.
11-
# Thus, `active_particle` is defined as a `Vector{UInt32}` because CUDA.jl
12-
# does not support atomic operations on `Bool`.
13-
# https://github.com/JuliaGPU/CUDA.jl/blob/2cc9285676a4cd28d0846ca62f0300c56d281d38/src/device/intrinsics/atomics.jl#L243
14-
active_particle = vcat(fill(UInt32(true), active_size),
15-
fill(UInt32(false), buffer_size))
10+
active_particle = vcat(fill(true, active_size), fill(false, buffer_size))
1611
eachparticle = collect(eachindex(active_particle))
1712

1813
return SystemBuffer(active_particle, Ref(active_size), eachparticle, buffer_size)
@@ -49,9 +44,8 @@ end
4944

5045
# TODO: Parallelize (see https://github.com/trixi-framework/TrixiParticles.jl/issues/810)
5146
# Update the number of active particles and the active particle indices
52-
buffer.active_particle_count[] = sum(active_particle)
53-
buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(x -> x == true,
54-
active_particle)
47+
buffer.active_particle_count[] = count(active_particle)
48+
buffer.eachparticle[1:buffer.active_particle_count[]] .= findall(active_particle)
5549

5650
return buffer
5751
end
@@ -64,29 +58,6 @@ end
6458
return view(buffer.eachparticle, 1:buffer.active_particle_count[])
6559
end
6660

67-
@inline function activate_next_particle(system)
68-
(; active_particle) = system.buffer
69-
70-
for particle in eachindex(active_particle)
71-
if PointNeighbors.Atomix.@atomic(active_particle[particle]) == false
72-
# After we go into this condition, another thread might still activate this particle
73-
# before we do. Therefore, we use an atomic swap, which activates the particle,
74-
# but also returns the old value.
75-
# If the old value is `true`, the particle was active before and we need to continue.
76-
# Note: This doesn't work with Metal.jl. No error is thrown, but the operation is simply ignored.
77-
# An atomic compare-and-swap operation is probably implemented for Metal.jl here:
78-
# https://github.com/JuliaGPU/Metal.jl/blob/caf299690aa52448ee72ffc5688939b157fc1ba2/src/device/intrinsics/atomics.jl#L42
79-
was_active = PointNeighbors.Atomix.@atomicswap active_particle[particle] = true
80-
81-
if was_active == false
82-
return particle
83-
end
84-
end
85-
end
86-
87-
error("No buffer particles available")
88-
end
89-
9061
@inline function deactivate_particle!(system, particle, u)
9162
(; active_particle) = system.buffer
9263

@@ -96,7 +67,8 @@ end
9667
u[dim, particle] = eltype(system)(1e16)
9768
end
9869

99-
# `activate_next_particle!` and `deactivate_particle!` are never called on the same buffer inside a kernel,
70+
# `deactivate_particle!` and `active_particle[particle] = true`
71+
# are never called on the same buffer inside a kernel,
10072
# so we don't have any race conditions on this `active_particle` vector.
10173
active_particle[particle] = false
10274

src/general/semidiscretization.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,8 @@ function set_system_links(system::OpenBoundarySPHSystem, semi)
964964
system.density,
965965
system.volume,
966966
system.pressure,
967+
system.boundary_candidates,
968+
system.fluid_candidates,
967969
system.boundary_zone,
968970
system.reference_velocity,
969971
system.reference_pressure,

src/schemes/boundary/open_boundary/system.jl

Lines changed: 66 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Open boundary system for in- and outflow particles.
3636
This is an experimental feature and may change in future releases.
3737
It is GPU-compatible (e.g., with CUDA.jl and AMDGPU.jl), but currently **not** supported with Metal.jl.
3838
"""
39-
struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BZ, RV,
39+
struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BC, FC, BZ, RV,
4040
RP, RD, B, UCU, C} <: System{NDIMS}
4141
boundary_model :: BM
4242
initial_condition :: IC
@@ -47,6 +47,8 @@ struct OpenBoundarySPHSystem{BM, ELTYPE, NDIMS, IC, FS, FSI, ARRAY1D, BZ, RV,
4747
density :: ARRAY1D # Array{ELTYPE, 1}: [particle]
4848
volume :: ARRAY1D # Array{ELTYPE, 1}: [particle]
4949
pressure :: ARRAY1D # Array{ELTYPE, 1}: [particle]
50+
boundary_candidates :: BC # Array{UInt32, 1}: [particle]
51+
fluid_candidates :: FC # Array{UInt32, 1}: [particle]
5052
boundary_zone :: BZ
5153
reference_velocity :: RV
5254
reference_pressure :: RP
@@ -58,18 +60,21 @@ end
5860

5961
function OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system,
6062
fluid_system_index, smoothing_length, mass, density, volume,
61-
pressure, boundary_zone, reference_velocity,
63+
pressure, boundary_candidates, fluid_candidates,
64+
boundary_zone, reference_velocity,
6265
reference_pressure, reference_density, buffer,
6366
update_callback_used, cache)
6467
OpenBoundarySPHSystem{typeof(boundary_model), eltype(mass), ndims(initial_condition),
6568
typeof(initial_condition), typeof(fluid_system),
66-
typeof(fluid_system_index), typeof(mass), typeof(boundary_zone),
67-
typeof(reference_velocity), typeof(reference_pressure),
68-
typeof(reference_density), typeof(buffer),
69-
typeof(update_callback_used),
69+
typeof(fluid_system_index), typeof(mass),
70+
typeof(boundary_candidates), typeof(fluid_candidates),
71+
typeof(boundary_zone), typeof(reference_velocity),
72+
typeof(reference_pressure), typeof(reference_density),
73+
typeof(buffer), typeof(update_callback_used),
7074
typeof(cache)}(boundary_model, initial_condition, fluid_system,
7175
fluid_system_index, smoothing_length, mass,
72-
density, volume, pressure, boundary_zone,
76+
density, volume, pressure, boundary_candidates,
77+
fluid_candidates, boundary_zone,
7378
reference_velocity, reference_pressure,
7479
reference_density, buffer, update_callback_used,
7580
cache)
@@ -153,9 +158,13 @@ function OpenBoundarySPHSystem(boundary_zone::BoundaryZone;
153158

154159
smoothing_length = initial_smoothing_length(fluid_system)
155160

161+
boundary_candidates = fill(false, nparticles(initial_condition))
162+
fluid_candidates = fill(false, nparticles(fluid_system))
163+
156164
return OpenBoundarySPHSystem(boundary_model, initial_condition, fluid_system,
157165
fluid_system_index, smoothing_length, mass, density,
158-
volume, pressure, boundary_zone, reference_velocity_,
166+
volume, pressure, boundary_candidates, fluid_candidates,
167+
boundary_zone, reference_velocity_,
159168
reference_pressure_, reference_density_, buffer,
160169
update_callback_used, cache)
161170
end
@@ -278,37 +287,67 @@ end
278287
update_open_boundary_eachstep!(system, v_ode, u_ode, semi, t) = system
279288

280289
function check_domain!(system, v, u, v_ode, u_ode, semi)
281-
(; boundary_zone) = system
290+
(; boundary_zone, boundary_candidates, fluid_candidates) = system
282291
fluid_system = corresponding_fluid_system(system, semi)
283292

284293
u_fluid = wrap_u(u_ode, fluid_system, semi)
285294
v_fluid = wrap_v(v_ode, fluid_system, semi)
286295

296+
boundary_candidates .= false
297+
287298
# Check the boundary particles whether they're leaving the boundary zone
288299
@threaded semi for particle in each_moving_particle(system)
289300
particle_coords = current_coords(u, system, particle)
290301

291302
# Check if boundary particle is outside the boundary zone
292303
if !is_in_boundary_zone(boundary_zone, particle_coords)
293-
convert_particle!(system, fluid_system, boundary_zone, particle,
294-
v, u, v_fluid, u_fluid)
304+
boundary_candidates[particle] = true
295305
end
296306
end
297307

308+
crossed_boundary_particles = findall(boundary_candidates)
309+
available_fluid_particles = findall(==(false), fluid_system.buffer.active_particle)
310+
311+
@assert length(crossed_boundary_particles)<=length(available_fluid_particles) "Not enough fluid buffer particles available"
312+
313+
# Convert open boundary particles in the fluid domain to fluid particles
314+
@threaded semi for i in eachindex(crossed_boundary_particles)
315+
particle = crossed_boundary_particles[i]
316+
particle_new = available_fluid_particles[i]
317+
318+
convert_particle!(system, fluid_system, boundary_zone, particle, particle_new,
319+
v, u, v_fluid, u_fluid)
320+
end
321+
298322
update_system_buffer!(system.buffer, semi)
299323
update_system_buffer!(fluid_system.buffer, semi)
300324

325+
fluid_candidates .= false
326+
301327
# Check the fluid particles whether they're entering the boundary zone
302328
@threaded semi for fluid_particle in each_moving_particle(fluid_system)
303329
fluid_coords = current_coords(u_fluid, fluid_system, fluid_particle)
304330

305331
# Check if fluid particle is in boundary zone
306332
if is_in_boundary_zone(boundary_zone, fluid_coords)
307-
convert_particle!(fluid_system, system, boundary_zone, fluid_particle,
308-
v, u, v_fluid, u_fluid)
333+
fluid_candidates[fluid_particle] = true
309334
end
310335
end
311336

337+
crossed_fluid_particles = findall(fluid_candidates)
338+
available_boundary_particles = findall(==(false), system.buffer.active_particle)
339+
340+
@assert length(crossed_fluid_particles)<=length(available_boundary_particles) "Not enough boundary buffer particles available"
341+
342+
# Convert fluid particles in the open boundary zone to open boundary particles
343+
@threaded semi for i in eachindex(crossed_fluid_particles)
344+
particle = crossed_fluid_particles[i]
345+
particle_new = available_boundary_particles[i]
346+
347+
convert_particle!(fluid_system, system, boundary_zone, particle, particle_new,
348+
v, u, v_fluid, u_fluid)
349+
end
350+
312351
update_system_buffer!(system.buffer, semi)
313352
update_system_buffer!(fluid_system.buffer, semi)
314353

@@ -317,21 +356,21 @@ end
317356

318357
# Outflow particle is outside the boundary zone
319358
@inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
320-
boundary_zone::BoundaryZone{OutFlow}, particle, v, u,
321-
v_fluid, u_fluid)
359+
boundary_zone::BoundaryZone{OutFlow}, particle,
360+
particle_new, v, u, v_fluid, u_fluid)
322361
deactivate_particle!(system, particle, u)
323362

324363
return system
325364
end
326365

327366
# Inflow particle is outside the boundary zone
328367
@inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
329-
boundary_zone::BoundaryZone{InFlow}, particle, v, u,
330-
v_fluid, u_fluid)
368+
boundary_zone::BoundaryZone{InFlow}, particle,
369+
particle_new, v, u, v_fluid, u_fluid)
331370
(; spanning_set) = boundary_zone
332371

333372
# Activate a new particle in simulation domain
334-
transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u)
373+
transfer_particle!(fluid_system, system, particle, particle_new, v_fluid, u_fluid, v, u)
335374

336375
# Reset position of boundary particle
337376
for dim in 1:ndims(system)
@@ -343,8 +382,8 @@ end
343382

344383
# Buffer particle is outside the boundary zone
345384
@inline function convert_particle!(system::OpenBoundarySPHSystem, fluid_system,
346-
boundary_zone::BoundaryZone{BidirectionalFlow}, particle,
347-
v, u, v_fluid, u_fluid)
385+
boundary_zone::BoundaryZone{BidirectionalFlow},
386+
particle, particle_new, v, u, v_fluid, u_fluid)
348387
relative_position = current_coords(u, system, particle) - boundary_zone.zone_origin
349388

350389
# Check if particle is in- or outside the fluid domain.
@@ -356,7 +395,7 @@ end
356395
end
357396

358397
# Activate a new particle in simulation domain
359-
transfer_particle!(fluid_system, system, particle, v_fluid, u_fluid, v, u)
398+
transfer_particle!(fluid_system, system, particle, particle_new, v_fluid, u_fluid, v, u)
360399

361400
# Reset position of boundary particle
362401
for dim in 1:ndims(system)
@@ -368,19 +407,21 @@ end
368407

369408
# Fluid particle is in boundary zone
370409
@inline function convert_particle!(fluid_system::FluidSystem, system,
371-
boundary_zone, particle, v, u, v_fluid, u_fluid)
410+
boundary_zone, particle, particle_new,
411+
v, u, v_fluid, u_fluid)
372412
# Activate particle in boundary zone
373-
transfer_particle!(system, fluid_system, particle, v, u, v_fluid, u_fluid)
413+
transfer_particle!(system, fluid_system, particle, particle_new, v, u, v_fluid, u_fluid)
374414

375415
# Deactivate particle in interior domain
376416
deactivate_particle!(fluid_system, particle, u_fluid)
377417

378418
return fluid_system
379419
end
380420

381-
@inline function transfer_particle!(system_new, system_old, particle_old,
421+
@inline function transfer_particle!(system_new, system_old, particle_old, particle_new,
382422
v_new, u_new, v_old, u_old)
383-
particle_new = activate_next_particle(system_new)
423+
# Activate new particle
424+
system_new.buffer.active_particle[particle_new] = true
384425

385426
# Transfer densities
386427
density = current_density(v_old, system_old, particle_old)

0 commit comments

Comments
 (0)