Skip to content

Commit 1027e08

Browse files
committed
Automatically add padding to make aligned loads possible.
1 parent c32bf84 commit 1027e08

2 files changed

Lines changed: 17 additions & 10 deletions

File tree

src/general/semidiscretization.jl

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,21 @@ function Semidiscretization(systems::Union{AbstractSystem, Nothing}...;
9494

9595
sizes_u = [u_nvariables(system) * n_integrated_particles(system)
9696
for system in systems]
97-
ranges_u = Tuple((sum(sizes_u[1:(i - 1)]) + 1):sum(sizes_u[1:i])
98-
for i in eachindex(sizes_u))
9997
sizes_v = [v_nvariables(system) * n_integrated_particles(system)
10098
for system in systems]
99+
100+
# Align sizes to 64 bytes by adding padding if necessary.
101+
# This ensures that aligned loads can be used on the integration arrays, which can
102+
# significantly improve performance on GPUs. Performance benefits on CPUs remain
103+
# to be investigated.
104+
for i in eachindex(systems)
105+
block_size = div(64, sizeof(eltype(systems[i])))
106+
sizes_u[i] = div(sizes_u[i], block_size, RoundUp) * block_size
107+
sizes_v[i] = div(sizes_v[i], block_size, RoundUp) * block_size
108+
end
109+
110+
ranges_u = Tuple((sum(sizes_u[1:(i - 1)]) + 1):sum(sizes_u[1:i])
111+
for i in eachindex(sizes_u))
101112
ranges_v = Tuple((sum(sizes_v[1:(i - 1)]) + 1):sum(sizes_v[1:i])
102113
for i in eachindex(sizes_v))
103114

src/schemes/fluid/weakly_compressible_sph/rhs.jl

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,10 @@ use_aligned_vrho_load(v, system, density_calculator) = false
186186
# The unaligned version `vload` does not produce wide load instructions on GPUs.
187187
function use_aligned_vrho_load(v::AbstractGPUArray, system, ::ContinuityDensity)
188188
if !can_use_aligned_load(v, 4)
189-
# If aligned loads are possible for the problem, but not allowed due to alignment,
190-
# we don't fall back to the non-SIMD version and throw an error instead.
191-
# This is likely a configuration error (see the error message below), and notifying
192-
# the user is better than silently falling back to slower loads and thus
193-
# non-deterministic performance.
194-
error("on GPUs in 3D, all WCSPH systems with `ContinuityDensity` must be the " *
195-
"first systems in the semidiscretization to ensure that their integration " *
196-
"arrays are aligned for SIMD loads.")
189+
# Aligned loads should always be possible on GPUs because the slices of `v_ode`
190+
# are aligned to 64 bytes in `Semidiscretization` and arrays on GPUs are always
191+
# aligned to full pages.
192+
error("illegal alignment of `v` integration array. Please report this issue.")
197193
end
198194

199195
return true

0 commit comments

Comments
 (0)