@@ -722,16 +722,20 @@ def lower_aliases(aliases, meta, opt_maxpar, opt_block_temps):
722722 # which is what appears in `ispace`
723723 interval = interval .lift (i .stamp )
724724
725- if opt_block_temps :
726- sub_iterators .update (dmapper )
727- writeto .append (interval )
728- intervals [d ] = interval
729- elif d .is_Block :
730- pd = d .parent
731- writeto .append (interval .relaxed )
732- # The lower/upper bounds belong to the parent BlockDimension
733- intervals [d ] = interval .zero ()
734- intervals [pd ] = interval .switch (pd )
725+ # Construct the write-to space, which will define the shape of the
726+ # temporary, and the iteration intervals in which it will be
727+ # populated
728+ if d .is_Block :
729+ if opt_block_temps :
730+ sub_iterators .update (dmapper )
731+ writeto .append (interval )
732+ intervals [d ] = interval
733+ else :
734+ pd = d .parent
735+ writeto .append (interval .relaxed )
736+ # The lower/upper bounds belong to the parent BlockDimension
737+ intervals [d ] = interval .zero ()
738+ intervals [pd ] = interval .switch (pd )
735739 else :
736740 writeto .append (interval .relaxed )
737741 intervals [d ] = interval
@@ -1505,24 +1509,22 @@ def __repr__(self):
15051509
15061510 @property
15071511 def dimensions (self ):
1508- # Legacy: with SubDimensions, we may have the following situation:
1509- #
1510- # for zi = z_m + zi_ltkn; zi <= z_M - zi_rtkn; ...
1511- # r[zi] = ...
1512- #
1513- # Instead of `r[zi - z_m - zi_ltkn]` we have just `r[zi]`, so we'll
1514- # need as much room as in `zi`'s parent to avoid going out-of-bounds.
1515- # Aside from ugly generated code, the reason we do not rather shift the
1516- # indices is that it prevents future passes to transform the loop
1517- # bounds (e.g., MPI's comp/comm overlap does that)
1518- return tuple (d .parent if d .is_AbstractSub else d
1519- for d in self .writeto .itdims )
1512+ return self .writeto .itdims
15201513
15211514 @property
15221515 def indices (self ):
1516+ """
1517+ The indices used to populate the temporary.
1518+
1519+ The write-to space may be relaxed for storage sizing, while the
1520+ temporary still has to be indexed with the active iteration Dimension.
1521+ """
15231522 bdims = [d for d in self .ispace .itdims if d .is_Block ]
15241523 depth = max ([d ._depth for d in bdims ], default = 0 )
15251524 mapper = {d .root : d for d in bdims if d ._depth == depth }
1525+
1526+ mapper .update ({d .root : d for d in self .ispace .itdims if d .is_AbstractSub })
1527+
15261528 return tuple (mapper .get (d .root , d ) for d in self .writeto .itdims )
15271529
15281530
0 commit comments