Skip to content

Commit 678f4ba

Browse files
committed
Add excludeNamePatterns to PromoteTensorsToL2Greedy
Default-skip Conv-weight buffers ("_conv_layers_", "_conv_weight", "_conv_bias"). Phase-1 bisection showed that promoting one specific FP32 Conv weight (tokenizer_conv_layers_0_0_weight, 6912B) breaks CCT_2_32_32_128 output equality 10/10 by itself, while all 21 other small constants in the candidate set promote safely. This is a partial defensive workaround — the AnomalyDetection int8 bug and the larger CCT proj-bias / positional-embedding cases hit the same per-tile weight DMA codegen bug from a different angle and are NOT fixed by this filter. See AI_AGENT Phase-0 doc \xc2\xa710.1 for the full minimal-repro table and the proposed kernel-side fix.
1 parent 95a097c commit 678f4ba

1 file changed

Lines changed: 13 additions & 1 deletion

File tree

Deeploy/MemoryLevelExtension/OptimizationPasses/MemoryLevelAnnotationPasses.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ def __init__(self,
9898
seed: int = 0,
9999
metricsPath: Optional[str] = None,
100100
setupCycles: int = DEFAULT_PROMOTE_SETUP_CYCLES,
101-
bandwidthBytesPerCycle: float = DEFAULT_PROMOTE_BANDWIDTH_BYTES_PER_CYCLE):
101+
bandwidthBytesPerCycle: float = DEFAULT_PROMOTE_BANDWIDTH_BYTES_PER_CYCLE,
102+
excludeNamePatterns: Optional[List[str]] = None):
102103
super().__init__()
103104
if strategy not in PROMOTE_STRATEGIES:
104105
raise ValueError(f"unknown promote strategy {strategy!r}; expected one of {PROMOTE_STRATEGIES}")
@@ -113,6 +114,15 @@ def __init__(self,
113114
self.metricsPath = metricsPath
114115
self.setupCycles = int(setupCycles)
115116
self.bandwidthBytesPerCycle = float(bandwidthBytesPerCycle)
117+
# Buffers whose name contains any of these substrings are skipped.
118+
# Default skips Conv-layer weights — see Phase-1 §10.1: promoting an
119+
# FP32 Conv weight into a static PI_L2 array breaks the per-tile
120+
# weight DMA path on Siracusa (CCT_2_32_32_128 fails 10/10 outputs
121+
# with only that one weight promoted; all 21 other CCT constants
122+
# promote correctly). Override with `excludeNamePatterns=[]` to
123+
# opt back in once the kernel codegen is fixed.
124+
self.excludeNamePatterns = list(excludeNamePatterns) if excludeNamePatterns is not None \
125+
else ["_conv_layers_", "_conv_weight", "_conv_bias"]
116126
# Accumulated decisions across all apply() calls within one process.
117127
# Same tensor seen in later calls keeps its first-seen decision.
118128
self._decisions: dict = {}
@@ -173,6 +183,8 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
173183
continue
174184
if self.onlyConstants and not isinstance(buf, ConstantBuffer):
175185
continue
186+
if any(pat in name for pat in self.excludeNamePatterns):
187+
continue
176188
reuse = max(1, len(getattr(buf, "_users", [])))
177189
if reuse < self.minReuse:
178190
continue

0 commit comments

Comments
 (0)