Skip to content

Commit 515daea

Browse files
committed
Add selection strategies and per-decision metrics to promotion pass
Extend PromoteTensorsToL2Greedy with strategy={greedy-score, knapsack-ratio,random,largest,smallest} and seed for the random case. Each apply() call accumulates per-tensor decisions on the pass instance so multi-subgraph runs produce a single coherent JSON written to --promoteMetricsPath. The JSON records bytes_promoted, dma_saved (sum of reuse*size for promoted), and full per-tensor decision lists. Used by the offline strategy comparison driver to quantify greedy vs random on tight L2 budgets.
1 parent 6ad6bfa commit 515daea

2 files changed

Lines changed: 133 additions & 24 deletions

File tree

Deeploy/MemoryLevelExtension/OptimizationPasses/MemoryLevelAnnotationPasses.py

Lines changed: 119 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
from typing import List, Tuple
5+
import json
6+
import os
7+
import random
8+
from typing import List, Optional, Tuple
69

710
import numpy as np
811
import onnx_graphsurgeon as gs
@@ -12,6 +15,8 @@
1215
from Deeploy.Logging import DEFAULT_LOGGER as log
1316
from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy
1417

18+
PROMOTE_STRATEGIES = ("greedy-score", "knapsack-ratio", "random", "largest", "smallest")
19+
1520

1621
class AnnotateDefaultMemoryLevel(SequentialPass):
1722

@@ -59,15 +64,21 @@ def _bufferSizeBytes(buffer: VariableBuffer) -> int:
5964

6065
class PromoteTensorsToL2Greedy(SequentialPass):
6166
"""Promote global tensors from a slower memory level (e.g. L3) to L2 by
62-
greedy score = reuse_count * size_bytes, subject to an L2 byte budget.
67+
one of several selection strategies, subject to an L2 byte budget.
6368
64-
The pass walks `ctxt.globalObjects`, considers any buffer whose current
65-
`_memoryLevel` matches `sourceLevel`, ranks them by descending score, and
66-
flips `_memoryLevel` to `targetLevel` while accumulated bytes fit in
67-
(targetLevel.size - headroomBytes - already-occupied-target-bytes).
69+
Strategies (`strategy=`):
70+
- greedy-score : sort by -(reuse * size). Bias toward high-traffic items.
71+
- knapsack-ratio : sort by -reuse (= -value/weight). Standard 0/1 knapsack
72+
greedy heuristic; favors small high-reuse tensors.
73+
- random : shuffle deterministically with `seed`.
74+
- largest : sort by -size.
75+
- smallest : sort by +size (pack many small items).
6876
6977
The downstream MiniMalloc / arena allocator is the ground-truth fit
7078
check; this pass is only a heuristic placement decision.
79+
80+
If `metricsPath` is given, a JSON summary of the decision (per-tensor
81+
decisions, totals) is written there.
7182
"""
7283

7384
def __init__(self,
@@ -76,14 +87,42 @@ def __init__(self,
7687
targetLevel: str = "L2",
7788
headroomBytes: int = 64000,
7889
minReuse: int = 1,
79-
onlyConstants: bool = False):
90+
onlyConstants: bool = False,
91+
strategy: str = "greedy-score",
92+
seed: int = 0,
93+
metricsPath: Optional[str] = None):
8094
super().__init__()
95+
if strategy not in PROMOTE_STRATEGIES:
96+
raise ValueError(f"unknown promote strategy {strategy!r}; expected one of {PROMOTE_STRATEGIES}")
8197
self.memoryHierarchy = memoryHierarchy
8298
self.sourceLevel = sourceLevel
8399
self.targetLevel = targetLevel
84100
self.headroomBytes = headroomBytes
85101
self.minReuse = minReuse
86102
self.onlyConstants = onlyConstants
103+
self.strategy = strategy
104+
self.seed = seed
105+
self.metricsPath = metricsPath
106+
# Accumulated decisions across all apply() calls within one process.
107+
# Same tensor seen in later calls keeps its first-seen decision.
108+
self._decisions: dict = {}
109+
self._invocations: int = 0
110+
111+
def _orderCandidates(self, candidates: List[Tuple[int, int, int, str, VariableBuffer]]) -> List:
112+
# candidate tuple = (score, size, reuse, name, buf)
113+
if self.strategy == "greedy-score":
114+
return sorted(candidates, key = lambda x: (-x[0], x[3]))
115+
if self.strategy == "knapsack-ratio":
116+
return sorted(candidates, key = lambda x: (-x[2], x[3]))
117+
if self.strategy == "largest":
118+
return sorted(candidates, key = lambda x: (-x[1], x[3]))
119+
if self.strategy == "smallest":
120+
return sorted(candidates, key = lambda x: (x[1], x[3]))
121+
if self.strategy == "random":
122+
ordered = list(candidates)
123+
random.Random(self.seed).shuffle(ordered)
124+
return ordered
125+
raise AssertionError(f"unhandled strategy {self.strategy}")
87126

88127
def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext, gs.Graph]:
89128
target = self.memoryHierarchy.memoryLevels.get(self.targetLevel)
@@ -103,7 +142,7 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
103142
used += _bufferSizeBytes(buf)
104143

105144
from Deeploy.DeeployTypes import ConstantBuffer
106-
candidates: List[Tuple[int, int, str, VariableBuffer]] = []
145+
candidates: List[Tuple[int, int, int, str, VariableBuffer]] = []
107146
for name, buf in ctxt.globalObjects.items():
108147
if not isinstance(buf, VariableBuffer):
109148
continue
@@ -122,26 +161,83 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
122161
if size <= 0:
123162
continue
124163
score = reuse * size
125-
candidates.append((score, size, name, buf))
164+
candidates.append((score, size, reuse, name, buf))
126165

127-
candidates.sort(key = lambda x: (-x[0], x[2]))
166+
ordered = self._orderCandidates(candidates)
128167

129-
promoted: List[Tuple[str, int]] = []
130-
skipped: List[Tuple[str, int]] = []
131-
for score, size, name, buf in candidates:
168+
promoted: List[Tuple[str, int, int, int]] = []
169+
skipped: List[Tuple[str, int, int, int]] = []
170+
for score, size, reuse, name, buf in ordered:
132171
if used + size <= budget:
133172
buf._memoryLevel = self.targetLevel
134173
used += size
135-
promoted.append((name, size))
174+
promoted.append((name, size, reuse, score))
136175
else:
137-
skipped.append((name, size))
138-
139-
log.info(f"[PromoteTensorsToL2Greedy] {self.sourceLevel}->{self.targetLevel}: "
140-
f"promoted {len(promoted)} tensor(s), {used}/{budget} bytes used "
141-
f"(headroom {self.headroomBytes}), {len(skipped)} skipped for capacity")
142-
for name, size in promoted:
143-
log.debug(f" + promote {name} ({size} B) -> {self.targetLevel}")
144-
for name, size in skipped:
145-
log.debug(f" - skip {name} ({size} B) stays at {self.sourceLevel}")
176+
skipped.append((name, size, reuse, score))
177+
178+
self._invocations += 1
179+
for name, size, reuse, score in promoted:
180+
self._decisions.setdefault(name, {
181+
"name": name,
182+
"size": size,
183+
"reuse": reuse,
184+
"score": score,
185+
"decision": "promoted",
186+
})
187+
for name, size, reuse, score in skipped:
188+
self._decisions.setdefault(name, {
189+
"name": name,
190+
"size": size,
191+
"reuse": reuse,
192+
"score": score,
193+
"decision": "skipped",
194+
})
195+
196+
dmaSaved = sum(s for _, _, _, s in promoted)
197+
totalCandidates = len(candidates)
198+
totalCandidateBytes = sum(c[1] for c in candidates)
199+
totalCandidateScore = sum(c[0] for c in candidates)
200+
201+
log.info(f"[PromoteTensorsToL2Greedy] strategy={self.strategy} seed={self.seed} "
202+
f"{self.sourceLevel}->{self.targetLevel} call#{self._invocations}: "
203+
f"{len(promoted)}/{totalCandidates} promoted, "
204+
f"{used}/{budget} B used (headroom {self.headroomBytes}), "
205+
f"dma_saved={dmaSaved} (of total {totalCandidateScore})")
206+
207+
if self.metricsPath:
208+
try:
209+
os.makedirs(os.path.dirname(self.metricsPath), exist_ok = True)
210+
except (FileNotFoundError, OSError):
211+
pass
212+
promotedList = [d for d in self._decisions.values() if d["decision"] == "promoted"]
213+
skippedList = [d for d in self._decisions.values() if d["decision"] == "skipped"]
214+
cumulativeBytes = sum(d["size"] for d in promotedList)
215+
cumulativeScore = sum(d["score"] for d in promotedList)
216+
allBytes = sum(d["size"] for d in self._decisions.values())
217+
allScore = sum(d["score"] for d in self._decisions.values())
218+
payload = {
219+
"strategy": self.strategy,
220+
"seed": self.seed,
221+
"source_level": self.sourceLevel,
222+
"target_level": self.targetLevel,
223+
"target_capacity": target.size,
224+
"headroom_bytes": self.headroomBytes,
225+
"budget": budget,
226+
"bytes_used_in_target": used,
227+
"invocations": self._invocations,
228+
"n_candidates": len(self._decisions),
229+
"candidates_total_bytes": allBytes,
230+
"candidates_total_score": allScore,
231+
"n_promoted": len(promotedList),
232+
"bytes_promoted": cumulativeBytes,
233+
"dma_saved": cumulativeScore,
234+
"promoted": promotedList,
235+
"skipped": skippedList,
236+
}
237+
try:
238+
with open(self.metricsPath, "w") as fh:
239+
json.dump(payload, fh, indent = 2)
240+
except OSError as exc:
241+
log.warning(f"[PromoteTensorsToL2Greedy] could not write metrics to {self.metricsPath}: {exc}")
146242

147243
return ctxt, graph

DeeployTest/testMVP.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,10 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
119119
sourceLevel = "L3",
120120
targetLevel = "L2",
121121
headroomBytes = args.promoteHeadroom,
122-
onlyConstants = args.promoteOnlyConstants))
122+
onlyConstants = args.promoteOnlyConstants,
123+
strategy = args.promoteStrategy,
124+
seed = args.promoteSeed,
125+
metricsPath = args.promoteMetricsPath))
123126

124127
# Make the deployer memory-level aware
125128
deployer = MemoryDeployerWrapper(deployer, memoryLevelAnnotationPasses)
@@ -229,6 +232,16 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
229232
action = 'store_true',
230233
default = False,
231234
help = 'Restrict promotion to ConstantBuffer (weights/biases) only.\n')
235+
parser.add_argument('--promoteStrategy',
236+
type = str,
237+
default = "greedy-score",
238+
choices = ["greedy-score", "knapsack-ratio", "random", "largest", "smallest"],
239+
help = 'Selection strategy for tensor promotion.\n')
240+
parser.add_argument('--promoteSeed', type = int, default = 0, help = 'RNG seed for --promoteStrategy=random.\n')
241+
parser.add_argument('--promoteMetricsPath',
242+
type = str,
243+
default = None,
244+
help = 'If set, write JSON metrics for the promotion decision to this path.\n')
232245
parser.add_argument(
233246
"--cores",
234247
type = int,

0 commit comments

Comments
 (0)