Skip to content

Commit 85b79ac

Browse files
committed
Phase-3: extend absolute offset to VariableBuffers (activation write-side)
Extend _generatePromotedReferenceReset to handle VariableBuffers (activations) in addition to ConstantBuffers. Uses _referenceName for baseSymbol (in scope via closure args) instead of unraveled._instance (which may be out of scope for local activations). Works on IC100/ML1/MMN/AD with activation promotion enabled. CCT_2 activation promotion still fails for 32KB residual activations that fit in L1 as 1 tile — the L2->L1 schedule has no per-tile offset to fix, but the L3 closure would have spatially sliced the activation per L3 tile.
1 parent 904d745 commit 85b79ac

1 file changed

Lines changed: 16 additions & 6 deletions

File tree

Deeploy/TilingExtension/CodeTransformationPasses/TilingCodeGeneration.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,17 +154,20 @@ def _generateDmaTransferCalls(self, ctxt: NetworkContext, tensorName: str, trans
154154

155155
def _generatePromotedReferenceReset(self, ctxt: NetworkContext, tensorName: str, transfers: List[HyperRectangle],
156156
tileIdxVar: str, externalBuffer: VariableBuffer) -> Optional[CodeSnippet]:
157-
"""For promoted ConstantBuffers at self.externalMemory: emit an
158-
absolute offset assignment BEFORE the DMA so it overrides the +0
159-
initialization at the L1 closure body top (which resets every call)."""
157+
"""For promoted buffers (ConstantBuffer or VariableBuffer) at
158+
self.externalMemory: emit an absolute offset assignment BEFORE
159+
the DMA so it overrides the +0 initialization at the closure
160+
body top (which resets every call). Covers both the read side
161+
(L2→L1 for promoted constants) and the write side (L1→L2 for
162+
promoted activations)."""
160163
if len(transfers) <= 1:
161164
return None
162-
from Deeploy.DeeployTypes import ConstantBuffer
165+
from Deeploy.DeeployTypes import TransientBuffer
163166
try:
164167
unraveled = ctxt.unravelReference(externalBuffer)
165168
except Exception:
166169
unraveled = externalBuffer
167-
if not isinstance(unraveled, ConstantBuffer):
170+
if isinstance(unraveled, TransientBuffer):
168171
return None
169172
if getattr(unraveled, '_memoryLevel', None) != self.externalMemory:
170173
return None
@@ -183,12 +186,19 @@ def _generatePromotedReferenceReset(self, ctxt: NetworkContext, tensorName: str,
183186
offsetBuf = ctxt.lookup(fullName)
184187
except Exception:
185188
return None
189+
# Use externalBuffer._referenceName (the buffer the ref points to)
190+
# which IS available in the closure scope (passed via args or
191+
# declared locally). For ConstantBuffers this is the global static
192+
# PI_L2 symbol; for VariableBuffers (activations) it's the
193+
# closure-arg-passed pointer.
194+
baseRef = externalBuffer._referenceName if hasattr(externalBuffer, '_referenceName') else str(
195+
unraveled._instance)
186196
return CodeSnippet(
187197
self._absoluteOffsetReferenceTemplate, {
188198
"reference": externalBuffer.name,
189199
"tileIdxVar": tileIdxVar,
190200
"typeName": externalBuffer._type.referencedType.typeName,
191-
"baseSymbol": str(unraveled._instance),
201+
"baseSymbol": baseRef,
192202
"absoluteOffsets": offsetBuf.name,
193203
})
194204

0 commit comments

Comments
 (0)