Add selection strategies and per-decision metrics to promotion pass

runwangdl · runwangdl · commit 515daea33f86 · 2026-04-16T13:53:53.000Z
Extend PromoteTensorsToL2Greedy with strategy={greedy-score,
knapsack-ratio,random,largest,smallest} and seed for the random case.
Each apply() call accumulates per-tensor decisions on the pass instance
so multi-subgraph runs produce a single coherent JSON written to
--promoteMetricsPath.

The JSON records bytes_promoted, dma_saved (sum of reuse*size for
promoted), and full per-tensor decision lists. Used by the offline
strategy comparison driver to quantify greedy vs random on tight L2
budgets.
diff --git a/Deeploy/MemoryLevelExtension/OptimizationPasses/MemoryLevelAnnotationPasses.py b/Deeploy/MemoryLevelExtension/OptimizationPasses/MemoryLevelAnnotationPasses.py
@@ -2,7 +2,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import List, Tuple
+import json
+import os
+import random
+from typing import List, Optional, Tuple
 
 import numpy as np
 import onnx_graphsurgeon as gs
@@ -12,6 +15,8 @@
 from Deeploy.Logging import DEFAULT_LOGGER as log
 from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy
 
+PROMOTE_STRATEGIES = ("greedy-score", "knapsack-ratio", "random", "largest", "smallest")
+
 
 class AnnotateDefaultMemoryLevel(SequentialPass):
 
@@ -59,15 +64,21 @@ def _bufferSizeBytes(buffer: VariableBuffer) -> int:
 
 class PromoteTensorsToL2Greedy(SequentialPass):
     """Promote global tensors from a slower memory level (e.g. L3) to L2 by
-    greedy score = reuse_count * size_bytes, subject to an L2 byte budget.
+    one of several selection strategies, subject to an L2 byte budget.
 
-    The pass walks `ctxt.globalObjects`, considers any buffer whose current
-    `_memoryLevel` matches `sourceLevel`, ranks them by descending score, and
-    flips `_memoryLevel` to `targetLevel` while accumulated bytes fit in
-    (targetLevel.size - headroomBytes - already-occupied-target-bytes).
+    Strategies (`strategy=`):
+      - greedy-score    : sort by -(reuse * size). Bias toward high-traffic items.
+      - knapsack-ratio  : sort by -reuse (= -value/weight). Standard 0/1 knapsack
+                          greedy heuristic; favors small high-reuse tensors.
+      - random          : shuffle deterministically with `seed`.
+      - largest         : sort by -size.
+      - smallest        : sort by +size (pack many small items).
 
     The downstream MiniMalloc / arena allocator is the ground-truth fit
     check; this pass is only a heuristic placement decision.
+
+    If `metricsPath` is given, a JSON summary of the decision (per-tensor
+    decisions, totals) is written there.
     """
 
     def __init__(self,
@@ -76,14 +87,42 @@ def __init__(self,
                  targetLevel: str = "L2",
                  headroomBytes: int = 64000,
                  minReuse: int = 1,
-                 onlyConstants: bool = False):
+                 onlyConstants: bool = False,
+                 strategy: str = "greedy-score",
+                 seed: int = 0,
+                 metricsPath: Optional[str] = None):
         super().__init__()
+        if strategy not in PROMOTE_STRATEGIES:
+            raise ValueError(f"unknown promote strategy {strategy!r}; expected one of {PROMOTE_STRATEGIES}")
         self.memoryHierarchy = memoryHierarchy
         self.sourceLevel = sourceLevel
         self.targetLevel = targetLevel
         self.headroomBytes = headroomBytes
         self.minReuse = minReuse
         self.onlyConstants = onlyConstants
+        self.strategy = strategy
+        self.seed = seed
+        self.metricsPath = metricsPath
+        # Accumulated decisions across all apply() calls within one process.
+        # Same tensor seen in later calls keeps its first-seen decision.
+        self._decisions: dict = {}
+        self._invocations: int = 0
+
+    def _orderCandidates(self, candidates: List[Tuple[int, int, int, str, VariableBuffer]]) -> List:
+        # candidate tuple = (score, size, reuse, name, buf)
+        if self.strategy == "greedy-score":
+            return sorted(candidates, key = lambda x: (-x[0], x[3]))
+        if self.strategy == "knapsack-ratio":
+            return sorted(candidates, key = lambda x: (-x[2], x[3]))
+        if self.strategy == "largest":
+            return sorted(candidates, key = lambda x: (-x[1], x[3]))
+        if self.strategy == "smallest":
+            return sorted(candidates, key = lambda x: (x[1], x[3]))
+        if self.strategy == "random":
+            ordered = list(candidates)
+            random.Random(self.seed).shuffle(ordered)
+            return ordered
+        raise AssertionError(f"unhandled strategy {self.strategy}")
 
     def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext, gs.Graph]:
         target = self.memoryHierarchy.memoryLevels.get(self.targetLevel)
@@ -103,7 +142,7 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
                 used += _bufferSizeBytes(buf)
 
         from Deeploy.DeeployTypes import ConstantBuffer
-        candidates: List[Tuple[int, int, str, VariableBuffer]] = []
+        candidates: List[Tuple[int, int, int, str, VariableBuffer]] = []
         for name, buf in ctxt.globalObjects.items():
             if not isinstance(buf, VariableBuffer):
                 continue
@@ -122,26 +161,83 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
             if size <= 0:
                 continue
             score = reuse * size
-            candidates.append((score, size, name, buf))
+            candidates.append((score, size, reuse, name, buf))
 
-        candidates.sort(key = lambda x: (-x[0], x[2]))
+        ordered = self._orderCandidates(candidates)
 
-        promoted: List[Tuple[str, int]] = []
-        skipped: List[Tuple[str, int]] = []
-        for score, size, name, buf in candidates:
+        promoted: List[Tuple[str, int, int, int]] = []
+        skipped: List[Tuple[str, int, int, int]] = []
+        for score, size, reuse, name, buf in ordered:
             if used + size <= budget:
                 buf._memoryLevel = self.targetLevel
                 used += size
-                promoted.append((name, size))
+                promoted.append((name, size, reuse, score))
             else:
-                skipped.append((name, size))
-
-        log.info(f"[PromoteTensorsToL2Greedy] {self.sourceLevel}->{self.targetLevel}: "
-                 f"promoted {len(promoted)} tensor(s), {used}/{budget} bytes used "
-                 f"(headroom {self.headroomBytes}), {len(skipped)} skipped for capacity")
-        for name, size in promoted:
-            log.debug(f"  + promote {name} ({size} B) -> {self.targetLevel}")
-        for name, size in skipped:
-            log.debug(f"  - skip    {name} ({size} B) stays at {self.sourceLevel}")
+                skipped.append((name, size, reuse, score))
+
+        self._invocations += 1
+        for name, size, reuse, score in promoted:
+            self._decisions.setdefault(name, {
+                "name": name,
+                "size": size,
+                "reuse": reuse,
+                "score": score,
+                "decision": "promoted",
+            })
+        for name, size, reuse, score in skipped:
+            self._decisions.setdefault(name, {
+                "name": name,
+                "size": size,
+                "reuse": reuse,
+                "score": score,
+                "decision": "skipped",
+            })
+
+        dmaSaved = sum(s for _, _, _, s in promoted)
+        totalCandidates = len(candidates)
+        totalCandidateBytes = sum(c[1] for c in candidates)
+        totalCandidateScore = sum(c[0] for c in candidates)
+
+        log.info(f"[PromoteTensorsToL2Greedy] strategy={self.strategy} seed={self.seed} "
+                 f"{self.sourceLevel}->{self.targetLevel} call#{self._invocations}: "
+                 f"{len(promoted)}/{totalCandidates} promoted, "
+                 f"{used}/{budget} B used (headroom {self.headroomBytes}), "
+                 f"dma_saved={dmaSaved} (of total {totalCandidateScore})")
+
+        if self.metricsPath:
+            try:
+                os.makedirs(os.path.dirname(self.metricsPath), exist_ok = True)
+            except (FileNotFoundError, OSError):
+                pass
+            promotedList = [d for d in self._decisions.values() if d["decision"] == "promoted"]
+            skippedList = [d for d in self._decisions.values() if d["decision"] == "skipped"]
+            cumulativeBytes = sum(d["size"] for d in promotedList)
+            cumulativeScore = sum(d["score"] for d in promotedList)
+            allBytes = sum(d["size"] for d in self._decisions.values())
+            allScore = sum(d["score"] for d in self._decisions.values())
+            payload = {
+                "strategy": self.strategy,
+                "seed": self.seed,
+                "source_level": self.sourceLevel,
+                "target_level": self.targetLevel,
+                "target_capacity": target.size,
+                "headroom_bytes": self.headroomBytes,
+                "budget": budget,
+                "bytes_used_in_target": used,
+                "invocations": self._invocations,
+                "n_candidates": len(self._decisions),
+                "candidates_total_bytes": allBytes,
+                "candidates_total_score": allScore,
+                "n_promoted": len(promotedList),
+                "bytes_promoted": cumulativeBytes,
+                "dma_saved": cumulativeScore,
+                "promoted": promotedList,
+                "skipped": skippedList,
+            }
+            try:
+                with open(self.metricsPath, "w") as fh:
+                    json.dump(payload, fh, indent = 2)
+            except OSError as exc:
+                log.warning(f"[PromoteTensorsToL2Greedy] could not write metrics to {self.metricsPath}: {exc}")
 
         return ctxt, graph
diff --git a/DeeployTest/testMVP.py b/DeeployTest/testMVP.py
@@ -119,7 +119,10 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
                                      sourceLevel = "L3",
                                      targetLevel = "L2",
                                      headroomBytes = args.promoteHeadroom,
-                                     onlyConstants = args.promoteOnlyConstants))
+                                     onlyConstants = args.promoteOnlyConstants,
+                                     strategy = args.promoteStrategy,
+                                     seed = args.promoteSeed,
+                                     metricsPath = args.promoteMetricsPath))
 
     # Make the deployer memory-level aware
     deployer = MemoryDeployerWrapper(deployer, memoryLevelAnnotationPasses)
@@ -229,6 +232,16 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
                         action = 'store_true',
                         default = False,
                         help = 'Restrict promotion to ConstantBuffer (weights/biases) only.\n')
+    parser.add_argument('--promoteStrategy',
+                        type = str,
+                        default = "greedy-score",
+                        choices = ["greedy-score", "knapsack-ratio", "random", "largest", "smallest"],
+                        help = 'Selection strategy for tensor promotion.\n')
+    parser.add_argument('--promoteSeed', type = int, default = 0, help = 'RNG seed for --promoteStrategy=random.\n')
+    parser.add_argument('--promoteMetricsPath',
+                        type = str,
+                        default = None,
+                        help = 'If set, write JSON metrics for the promotion decision to this path.\n')
     parser.add_argument(
         "--cores",
         type = int,