You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
iflength(indices) !=3@IncoherentArgumentError("incoherent arguments memopt in @parallel[_indices] <kernel>: optimization can only be applied in 3-D @parallel kernels and @parallel_indices kernels with three indices.") end
63
+
iflength(indices) ∉ (2, 3) @IncoherentArgumentError("incoherent arguments memopt in @parallel[_indices] <kernel>: optimization can only be applied in 2-D and 3-D @parallel kernels and @parallel_indices kernels.") end
64
+
if loopdim !=length(indices) @IncoherentArgumentError("incoherent arguments memopt in @parallel[_indices] <kernel>: two-index kernels require `loopdim=2` and three-index kernels require `loopdim=3`.") end
65
+
if loopdim ==2&&!isnothing(use_shmemhalos) @IncoherentArgumentError("incoherent arguments memopt in @parallel[_indices] <kernel>: shared-memory-related keywords are not supported for two-index memory-optimized kernels.") end
64
66
if optvars == (Symbol(""),)
65
67
optvars =Tuple(keys(readonlyvars))
66
68
else
@@ -83,7 +85,109 @@ function memopt(metadata_module::Module, is_parallel_kernel::Bool, caller::Modul
for A in optvars for (ox, regs) in regqueue_heads[A] for (oy, reg) in regs for loopentry = (loopentrys[A],)
140
+
)...
141
+
)
142
+
$((
143
+
:(
144
+
$(regs[oy]) =$(regs[oy+1])
145
+
)
146
+
for A in optvars for regs invalues(regqueue_tails[A]) for oy insort(keys(regs)) for (loopentry, oy_max) = ((loopentrys[A], oy_maxs[A]),) if oy<=oy_max-2
147
+
)...
148
+
)
149
+
$((
150
+
:(
151
+
$reg =$(regqueue_heads[A][ox][oy_max])
152
+
)
153
+
for A in optvars for (ox, regs) in regqueue_tails[A] for (oy, reg) in regs for (loopentry, oy_max) = ((loopentrys[A], oy_maxs[A]),) if oy==oy_max-1&&haskey(regqueue_heads[A], ox) &&haskey(regqueue_heads[A][ox], oy_max)
154
+
)...
155
+
)
156
+
end
157
+
))
158
+
)
159
+
$((wrap_loop(i, 1:loopend,
160
+
quote
161
+
$iy =$i +$loopoffset
162
+
if ($iy >$range_y_end) ParallelStencil.@return_nothing; end
for A in optvars for (ox, regs) in regqueue_heads[A] for (oy, reg) in regs for loopentry = (loopentrys[A],)
169
+
)...
170
+
)
171
+
$body
172
+
$((
173
+
:(
174
+
$(regs[oy]) =$(regs[oy+1])
175
+
)
176
+
for A in optvars for regs invalues(regqueue_tails[A]) for oy insort(keys(regs)) for (loopentry, oy_max) = ((loopentrys[A], oy_maxs[A]),) if oy<=oy_max-2
177
+
)...
178
+
)
179
+
$((
180
+
:(
181
+
$reg =$(regqueue_heads[A][ox][oy_max])
182
+
)
183
+
for A in optvars for (ox, regs) in regqueue_tails[A] for (oy, reg) in regs for (loopentry, oy_max) = ((loopentrys[A], oy_maxs[A]),) if oy==oy_max-1&&haskey(regqueue_heads[A], ox) &&haskey(regqueue_heads[A][ox], oy_max)
#TODO: this only leads to correct result after row two executions in a row, probably due to the same compiler bug has below. # loopsize = (oz_span_max<=0) ? 1 : loopsize # NOTE: if the stencilrange in z is only one point, no loop is needed.
@@ -541,6 +645,19 @@ function extract_offsets(caller::Module, body::Expr, indices::NTuple{N,<:Union{S
@ArgumentError("memopt: only loopdim=3 is currently supported.")
644
800
end
@@ -1020,11 +1176,19 @@ function wrap_loop(index::Symbol, range::UnitRange, block::Expr; unroll=false)
1020
1176
end
1021
1177
end
1022
1178
1023
-
functionstore_metadata(metadata_module::Module, is_parallel_kernel::Bool, caller::Module, offset_mins::Dict{Symbol, <:NTuple{3,Integer}}, offset_maxs::Dict{Symbol, <:NTuple{3,Integer}}, offsets::Dict{Symbol, Dict{Any, Any}}, optvars::NTuple{N,Symbol}where N, shmem_optvars::NTuple{M,Symbol}where M, use_any_shmem::Bool, loopdim::Integer, loopsize::Integer, optranges::Dict{Any, Any}, use_shmemhalos)
1179
+
functionstore_metadata(metadata_module::Module, is_parallel_kernel::Bool, caller::Module, offset_mins::Dict{Symbol, <:Tuple}, offset_maxs::Dict{Symbol, <:Tuple}, offsets::Dict{Symbol, Dict{Any, Any}}, optvars::NTuple{N,Symbol}where N, shmem_optvars::NTuple{M,Symbol}where M, use_any_shmem::Bool, loopdim::Integer, loopsize::Integer, optranges::Dict{Any, Any}, use_shmemhalos)
1024
1180
memopt =true
1025
1181
nonconst_metadata =get_nonconst_metadata(caller)
1026
-
stencilranges =NamedTuple(A => (offset_mins[A][1]:offset_maxs[A][1], offset_mins[A][2]:offset_maxs[A][2], offset_mins[A][3]:offset_maxs[A][3]) for A in optvars)
1027
-
use_shmemhalos =NamedTuple(A => use_shmemhalos[A] for A in optvars)
1182
+
stencilranges =NamedTuple(A =>begin
1183
+
offset_min = offset_mins[A]
1184
+
offset_max = offset_maxs[A]
1185
+
ndims =length(offset_min)
1186
+
x = offset_min[1]:offset_max[1]
1187
+
y = (ndims >1? offset_min[2] :0):(ndims >1? offset_max[2] :0)
1188
+
z = (ndims >2? offset_min[3] :0):(ndims >2? offset_max[3] :0)
1189
+
(x, y, z)
1190
+
endfor A in optvars)
1191
+
use_shmemhalos =NamedTuple(A =>get(use_shmemhalos, A, false) for A in optvars)
0 commit comments