Add excludeNamePatterns to PromoteTensorsToL2Greedy

runwangdl · runwangdl · commit 678f4ba11001 · 2026-04-16T14:50:08.000Z
Default-skip Conv-weight buffers ("_conv_layers_", "_conv_weight",
"_conv_bias"). Phase-1 bisection showed that promoting one specific
FP32 Conv weight (tokenizer_conv_layers_0_0_weight, 6912B) breaks
CCT_2_32_32_128 output equality 10/10 by itself, while all 21 other
small constants in the candidate set promote safely.

This is a partial defensive workaround — the AnomalyDetection int8
bug and the larger CCT proj-bias / positional-embedding cases hit the
same per-tile weight DMA codegen bug from a different angle and are
NOT fixed by this filter. See AI_AGENT Phase-0 doc \xc2\xa710.1 for the
full minimal-repro table and the proposed kernel-side fix.
diff --git a/Deeploy/MemoryLevelExtension/OptimizationPasses/MemoryLevelAnnotationPasses.py b/Deeploy/MemoryLevelExtension/OptimizationPasses/MemoryLevelAnnotationPasses.py
@@ -98,7 +98,8 @@ def __init__(self,
                  seed: int = 0,
                  metricsPath: Optional[str] = None,
                  setupCycles: int = DEFAULT_PROMOTE_SETUP_CYCLES,
-                 bandwidthBytesPerCycle: float = DEFAULT_PROMOTE_BANDWIDTH_BYTES_PER_CYCLE):
+                 bandwidthBytesPerCycle: float = DEFAULT_PROMOTE_BANDWIDTH_BYTES_PER_CYCLE,
+                 excludeNamePatterns: Optional[List[str]] = None):
         super().__init__()
         if strategy not in PROMOTE_STRATEGIES:
             raise ValueError(f"unknown promote strategy {strategy!r}; expected one of {PROMOTE_STRATEGIES}")
@@ -113,6 +114,15 @@ def __init__(self,
         self.metricsPath = metricsPath
         self.setupCycles = int(setupCycles)
         self.bandwidthBytesPerCycle = float(bandwidthBytesPerCycle)
+        # Buffers whose name contains any of these substrings are skipped.
+        # Default skips Conv-layer weights — see Phase-1 §10.1: promoting an
+        # FP32 Conv weight into a static PI_L2 array breaks the per-tile
+        # weight DMA path on Siracusa (CCT_2_32_32_128 fails 10/10 outputs
+        # with only that one weight promoted; all 21 other CCT constants
+        # promote correctly). Override with `excludeNamePatterns=[]` to
+        # opt back in once the kernel codegen is fixed.
+        self.excludeNamePatterns = list(excludeNamePatterns) if excludeNamePatterns is not None \
+            else ["_conv_layers_", "_conv_weight", "_conv_bias"]
         # Accumulated decisions across all apply() calls within one process.
         # Same tensor seen in later calls keeps its first-seen decision.
         self._decisions: dict = {}
@@ -173,6 +183,8 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
                 continue
             if self.onlyConstants and not isinstance(buf, ConstantBuffer):
                 continue
+            if any(pat in name for pat in self.excludeNamePatterns):
+                continue
             reuse = max(1, len(getattr(buf, "_users", [])))
             if reuse < self.minReuse:
                 continue