Skip to content

Commit f8b6629

Browse files
committed
Fix tests
1 parent 1e5dc85 commit f8b6629

2 files changed

Lines changed: 34 additions & 21 deletions

File tree

src/general/semidiscretization.jl

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,25 @@ function Semidiscretization(systems::Union{AbstractSystem, Nothing}...;
9797
sizes_v = [v_nvariables(system) * n_integrated_particles(system)
9898
for system in systems]
9999

100-
# Align sizes to 64 bytes by adding padding if necessary.
101-
# This ensures that aligned loads can be used on the integration arrays, which can
102-
# significantly improve performance on GPUs. Performance benefits on CPUs remain
103-
# to be investigated.
100+
start_u = 1
101+
start_v = 1
102+
ranges_u_vec = Vector{UnitRange{Int}}(undef, length(systems))
103+
ranges_v_vec = Vector{UnitRange{Int}}(undef, length(systems))
104104
for i in eachindex(systems)
105+
ranges_u_vec[i] = start_u:(start_u + sizes_u[i] - 1)
106+
ranges_v_vec[i] = start_v:(start_v + sizes_v[i] - 1)
107+
108+
# Align sizes to 64 bytes by adding padding if necessary.
109+
# This ensures that aligned loads can be used on the integration arrays, which can
110+
# significantly improve performance on GPUs. Performance benefits on CPUs remain
111+
# to be investigated.
105112
block_size = div(64, sizeof(eltype(systems[i])))
106-
sizes_u[i] = div(sizes_u[i], block_size, RoundUp) * block_size
107-
sizes_v[i] = div(sizes_v[i], block_size, RoundUp) * block_size
113+
start_u += div(sizes_u[i], block_size, RoundUp) * block_size
114+
start_v += div(sizes_v[i], block_size, RoundUp) * block_size
108115
end
109116

110-
ranges_u = Tuple((sum(sizes_u[1:(i - 1)]) + 1):sum(sizes_u[1:i])
111-
for i in eachindex(sizes_u))
112-
ranges_v = Tuple((sum(sizes_v[1:(i - 1)]) + 1):sum(sizes_v[1:i])
113-
for i in eachindex(sizes_v))
117+
ranges_u = Tuple(ranges_u_vec)
118+
ranges_v = Tuple(ranges_v_vec)
114119

115120
# Create a n x n matrix of n neighborhood searches for each of the n systems.
116121
# We will need one neighborhood search for each pair of systems.
@@ -256,13 +261,13 @@ function semidiscretize(semi, tspan; reset_threads=true)
256261
Polyester.reset_threads!()
257262
end
258263

259-
sizes_u = (u_nvariables(system) * n_integrated_particles(system) for system in systems)
260-
sizes_v = (v_nvariables(system) * n_integrated_particles(system) for system in systems)
264+
size_u_ode = semi.ranges_u[end].stop
265+
size_v_ode = semi.ranges_v[end].stop
261266

262267
# Use either the specified backend, e.g., `CUDABackend` or `MetalBackend` or
263268
# use CPU vectors for all CPU backends.
264-
u0_ode_ = allocate(semi.parallelization_backend, cELTYPE, sum(sizes_u))
265-
v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, sum(sizes_v))
269+
u0_ode_ = allocate(semi.parallelization_backend, cELTYPE, size_u_ode)
270+
v0_ode_ = allocate(semi.parallelization_backend, ELTYPE, size_v_ode)
266271

267272
if semi.parallelization_backend isa KernelAbstractions.GPU
268273
u0_ode = u0_ode_
@@ -277,6 +282,9 @@ function semidiscretize(semi, tspan; reset_threads=true)
277282
parallelization_backend=semi.parallelization_backend)
278283
end
279284

285+
u0_ode .= 0
286+
v0_ode .= 0
287+
280288
# Set initial condition
281289
foreach_system_wrapped(semi, v0_ode, u0_ode) do system, v0_system, u0_system
282290
write_u0!(u0_system, system)
@@ -367,8 +375,9 @@ end
367375
range = ranges_v[system_indices(system, semi)]
368376

369377
@boundscheck begin
370-
if length(range) != v_nvariables(system) * n_integrated_particles(system)
371-
throw(DimensionMismatch("`v_ode` range length $range_length does not match " *
378+
expected = v_nvariables(system) * n_integrated_particles(system)
379+
if length(range) != expected
380+
throw(DimensionMismatch("`v_ode` range length $(length(range)) does not match " *
372381
"expected number of entries $expected"))
373382
end
374383
end

test/general/semidiscretization.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
system2 = System2()
99

1010
Base.eltype(::System1) = Float64
11+
Base.eltype(::System2) = Float64
1112
TrixiParticles.coordinates_eltype(::System1) = Float32
1213
TrixiParticles.u_nvariables(::System1) = 3
1314
TrixiParticles.u_nvariables(::System2) = 4
@@ -24,9 +25,12 @@
2425
@testset verbose=true "Constructor" begin
2526
semi = Semidiscretization(system1, system2, neighborhood_search=nothing)
2627

27-
# Verification
28-
@test semi.ranges_u == (1:6, 7:18)
29-
@test semi.ranges_v == (1:6, 7:12)
28+
# Verification: These are the ranges that we would expect based on system sizes:
29+
# semi.ranges_u == (1:6, 7:18)
30+
# semi.ranges_v == (1:6, 7:12)
31+
# Due to alignment to 64 bytes, the ranges are adjusted to be:
32+
@test semi.ranges_u == (1:6, 9:20)
33+
@test semi.ranges_v == (1:6, 9:14)
3034

3135
nhs = [TrixiParticles.TrivialNeighborhoodSearch{3}(search_radius=0.2,
3236
eachpoint=1:2)
@@ -152,8 +156,8 @@
152156

153157
semi = Semidiscretization(system1, system2, neighborhood_search=nothing)
154158

155-
dv_ode = zeros(3 * 2 + 2 * 3)
156-
du_ode = zeros(3 * 2 + 4 * 3)
159+
dv_ode = zeros(semi.ranges_v[end].stop)
160+
du_ode = zeros(semi.ranges_u[end].stop)
157161
u_ode = zero(du_ode)
158162

159163
v1 = [1.0 2.0

0 commit comments

Comments
 (0)