22#
33# SPDX-License-Identifier: Apache-2.0
44
5- from typing import List , Tuple
5+ import json
6+ import os
7+ import random
8+ from typing import List , Optional , Tuple
69
710import numpy as np
811import onnx_graphsurgeon as gs
1215from Deeploy .Logging import DEFAULT_LOGGER as log
1316from Deeploy .MemoryLevelExtension .MemoryLevels import MemoryHierarchy
1417
18+ PROMOTE_STRATEGIES = ("greedy-score" , "knapsack-ratio" , "random" , "largest" , "smallest" )
19+
1520
1621class AnnotateDefaultMemoryLevel (SequentialPass ):
1722
@@ -59,15 +64,21 @@ def _bufferSizeBytes(buffer: VariableBuffer) -> int:
5964
6065class PromoteTensorsToL2Greedy (SequentialPass ):
6166 """Promote global tensors from a slower memory level (e.g. L3) to L2 by
62- greedy score = reuse_count * size_bytes , subject to an L2 byte budget.
67+ one of several selection strategies , subject to an L2 byte budget.
6368
64- The pass walks `ctxt.globalObjects`, considers any buffer whose current
65- `_memoryLevel` matches `sourceLevel`, ranks them by descending score, and
66- flips `_memoryLevel` to `targetLevel` while accumulated bytes fit in
67- (targetLevel.size - headroomBytes - already-occupied-target-bytes).
69+ Strategies (`strategy=`):
70+ - greedy-score : sort by -(reuse * size). Bias toward high-traffic items.
71+ - knapsack-ratio : sort by -reuse (= -value/weight). Standard 0/1 knapsack
72+ greedy heuristic; favors small high-reuse tensors.
73+ - random : shuffle deterministically with `seed`.
74+ - largest : sort by -size.
75+ - smallest : sort by +size (pack many small items).
6876
6977 The downstream MiniMalloc / arena allocator is the ground-truth fit
7078 check; this pass is only a heuristic placement decision.
79+
80+ If `metricsPath` is given, a JSON summary of the decision (per-tensor
81+ decisions, totals) is written there.
7182 """
7283
7384 def __init__ (self ,
@@ -76,14 +87,42 @@ def __init__(self,
7687 targetLevel : str = "L2" ,
7788 headroomBytes : int = 64000 ,
7889 minReuse : int = 1 ,
79- onlyConstants : bool = False ):
90+ onlyConstants : bool = False ,
91+ strategy : str = "greedy-score" ,
92+ seed : int = 0 ,
93+ metricsPath : Optional [str ] = None ):
8094 super ().__init__ ()
95+ if strategy not in PROMOTE_STRATEGIES :
96+ raise ValueError (f"unknown promote strategy { strategy !r} ; expected one of { PROMOTE_STRATEGIES } " )
8197 self .memoryHierarchy = memoryHierarchy
8298 self .sourceLevel = sourceLevel
8399 self .targetLevel = targetLevel
84100 self .headroomBytes = headroomBytes
85101 self .minReuse = minReuse
86102 self .onlyConstants = onlyConstants
103+ self .strategy = strategy
104+ self .seed = seed
105+ self .metricsPath = metricsPath
106+ # Accumulated decisions across all apply() calls within one process.
107+ # Same tensor seen in later calls keeps its first-seen decision.
108+ self ._decisions : dict = {}
109+ self ._invocations : int = 0
110+
111+ def _orderCandidates (self , candidates : List [Tuple [int , int , int , str , VariableBuffer ]]) -> List :
112+ # candidate tuple = (score, size, reuse, name, buf)
113+ if self .strategy == "greedy-score" :
114+ return sorted (candidates , key = lambda x : (- x [0 ], x [3 ]))
115+ if self .strategy == "knapsack-ratio" :
116+ return sorted (candidates , key = lambda x : (- x [2 ], x [3 ]))
117+ if self .strategy == "largest" :
118+ return sorted (candidates , key = lambda x : (- x [1 ], x [3 ]))
119+ if self .strategy == "smallest" :
120+ return sorted (candidates , key = lambda x : (x [1 ], x [3 ]))
121+ if self .strategy == "random" :
122+ ordered = list (candidates )
123+ random .Random (self .seed ).shuffle (ordered )
124+ return ordered
125+ raise AssertionError (f"unhandled strategy { self .strategy } " )
87126
88127 def apply (self , ctxt : NetworkContext , graph : gs .Graph ) -> Tuple [NetworkContext , gs .Graph ]:
89128 target = self .memoryHierarchy .memoryLevels .get (self .targetLevel )
@@ -103,7 +142,7 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
103142 used += _bufferSizeBytes (buf )
104143
105144 from Deeploy .DeeployTypes import ConstantBuffer
106- candidates : List [Tuple [int , int , str , VariableBuffer ]] = []
145+ candidates : List [Tuple [int , int , int , str , VariableBuffer ]] = []
107146 for name , buf in ctxt .globalObjects .items ():
108147 if not isinstance (buf , VariableBuffer ):
109148 continue
@@ -122,26 +161,83 @@ def apply(self, ctxt: NetworkContext, graph: gs.Graph) -> Tuple[NetworkContext,
122161 if size <= 0 :
123162 continue
124163 score = reuse * size
125- candidates .append ((score , size , name , buf ))
164+ candidates .append ((score , size , reuse , name , buf ))
126165
127- candidates . sort ( key = lambda x : ( - x [ 0 ], x [ 2 ]) )
166+ ordered = self . _orderCandidates ( candidates )
128167
129- promoted : List [Tuple [str , int ]] = []
130- skipped : List [Tuple [str , int ]] = []
131- for score , size , name , buf in candidates :
168+ promoted : List [Tuple [str , int , int , int ]] = []
169+ skipped : List [Tuple [str , int , int , int ]] = []
170+ for score , size , reuse , name , buf in ordered :
132171 if used + size <= budget :
133172 buf ._memoryLevel = self .targetLevel
134173 used += size
135- promoted .append ((name , size ))
174+ promoted .append ((name , size , reuse , score ))
136175 else :
137- skipped .append ((name , size ))
138-
139- log .info (f"[PromoteTensorsToL2Greedy] { self .sourceLevel } ->{ self .targetLevel } : "
140- f"promoted { len (promoted )} tensor(s), { used } /{ budget } bytes used "
141- f"(headroom { self .headroomBytes } ), { len (skipped )} skipped for capacity" )
142- for name , size in promoted :
143- log .debug (f" + promote { name } ({ size } B) -> { self .targetLevel } " )
144- for name , size in skipped :
145- log .debug (f" - skip { name } ({ size } B) stays at { self .sourceLevel } " )
176+ skipped .append ((name , size , reuse , score ))
177+
178+ self ._invocations += 1
179+ for name , size , reuse , score in promoted :
180+ self ._decisions .setdefault (name , {
181+ "name" : name ,
182+ "size" : size ,
183+ "reuse" : reuse ,
184+ "score" : score ,
185+ "decision" : "promoted" ,
186+ })
187+ for name , size , reuse , score in skipped :
188+ self ._decisions .setdefault (name , {
189+ "name" : name ,
190+ "size" : size ,
191+ "reuse" : reuse ,
192+ "score" : score ,
193+ "decision" : "skipped" ,
194+ })
195+
196+ dmaSaved = sum (s for _ , _ , _ , s in promoted )
197+ totalCandidates = len (candidates )
198+ totalCandidateBytes = sum (c [1 ] for c in candidates )
199+ totalCandidateScore = sum (c [0 ] for c in candidates )
200+
201+ log .info (f"[PromoteTensorsToL2Greedy] strategy={ self .strategy } seed={ self .seed } "
202+ f"{ self .sourceLevel } ->{ self .targetLevel } call#{ self ._invocations } : "
203+ f"{ len (promoted )} /{ totalCandidates } promoted, "
204+ f"{ used } /{ budget } B used (headroom { self .headroomBytes } ), "
205+ f"dma_saved={ dmaSaved } (of total { totalCandidateScore } )" )
206+
207+ if self .metricsPath :
208+ try :
209+ os .makedirs (os .path .dirname (self .metricsPath ), exist_ok = True )
210+ except (FileNotFoundError , OSError ):
211+ pass
212+ promotedList = [d for d in self ._decisions .values () if d ["decision" ] == "promoted" ]
213+ skippedList = [d for d in self ._decisions .values () if d ["decision" ] == "skipped" ]
214+ cumulativeBytes = sum (d ["size" ] for d in promotedList )
215+ cumulativeScore = sum (d ["score" ] for d in promotedList )
216+ allBytes = sum (d ["size" ] for d in self ._decisions .values ())
217+ allScore = sum (d ["score" ] for d in self ._decisions .values ())
218+ payload = {
219+ "strategy" : self .strategy ,
220+ "seed" : self .seed ,
221+ "source_level" : self .sourceLevel ,
222+ "target_level" : self .targetLevel ,
223+ "target_capacity" : target .size ,
224+ "headroom_bytes" : self .headroomBytes ,
225+ "budget" : budget ,
226+ "bytes_used_in_target" : used ,
227+ "invocations" : self ._invocations ,
228+ "n_candidates" : len (self ._decisions ),
229+ "candidates_total_bytes" : allBytes ,
230+ "candidates_total_score" : allScore ,
231+ "n_promoted" : len (promotedList ),
232+ "bytes_promoted" : cumulativeBytes ,
233+ "dma_saved" : cumulativeScore ,
234+ "promoted" : promotedList ,
235+ "skipped" : skippedList ,
236+ }
237+ try :
238+ with open (self .metricsPath , "w" ) as fh :
239+ json .dump (payload , fh , indent = 2 )
240+ except OSError as exc :
241+ log .warning (f"[PromoteTensorsToL2Greedy] could not write metrics to { self .metricsPath } : { exc } " )
146242
147243 return ctxt , graph
0 commit comments