44
55from typing import List , Tuple
66
7- import numpy as np
87import onnx_graphsurgeon as gs
98
109from Deeploy .CommonExtensions .OptimizationPasses .PassClasses import SequentialPass
11- from Deeploy .DeeployTypes import ConstantBuffer , NetworkContext , VariableBuffer
12- from Deeploy .MemoryLevelExtension .MemoryLevels import MemoryHierarchy , MemoryLevel
10+ from Deeploy .DeeployTypes import NetworkContext , VariableBuffer
11+ from Deeploy .MemoryLevelExtension .MemoryLevels import MemoryHierarchy
1312
1413
1514class AnnotateDefaultMemoryLevel (SequentialPass ):
@@ -19,15 +18,9 @@ def __init__(self, memoryHierarchy: MemoryHierarchy):
1918 self .memoryHierarchy = memoryHierarchy
2019
2120 def apply (self , ctxt : NetworkContext , graph : gs .Graph ) -> Tuple [NetworkContext , gs .Graph ]:
22- defaultLevel = self .memoryHierarchy .getDefaultMemoryLevel ().name
2321 for _buffer in {** ctxt .localObjects , ** ctxt .globalObjects }.values ():
2422 if not hasattr (_buffer , "_memoryLevel" ):
25- if isinstance (_buffer , ConstantBuffer ) and defaultLevel == "L3" :
26- # ConstantBuffer (weights, biases, BN params) are small and
27- # excluded from the tiling DMA schedule. Keep them in L2.
28- _buffer ._memoryLevel = "L2"
29- else :
30- _buffer ._memoryLevel = defaultLevel
23+ _buffer ._memoryLevel = self .memoryHierarchy .getDefaultMemoryLevel ().name
3124 return ctxt , graph
3225
3326
@@ -53,37 +46,4 @@ def globalBuffers(tensors: List[gs.Tensor]) -> List[VariableBuffer]:
5346 for _buffer in buffers :
5447 _buffer ._memoryLevel = self .ioLevel
5548
56- return ctxt , graph
57-
58-
59- class AnnotateNeurekaWeightMemoryLevel (SequentialPass ):
60-
61- def __init__ (self , neurekaEngineName : str , weightMemoryLevel : MemoryLevel ):
62- self ._weightMemoryLevel = weightMemoryLevel
63- self .neurekaEngineName = neurekaEngineName
64- super ().__init__ ()
65-
66- def apply (self , ctxt : NetworkContext , graph : gs .Graph ) -> Tuple [NetworkContext , gs .Graph ]:
67-
68- def _neurekaWeightBufferSize (buffer : ConstantBuffer ) -> int :
69- return int (np .prod (buffer .shape )) # Weights are encoded as bytes so no need to check for typeWidth
70-
71- weightMemoryOccupation = 0
72-
73- # Current weight memory occupation
74- for buffer in {** ctxt .globalObjects , ** ctxt .localObjects }.values ():
75- if hasattr (buffer , "_memoryLevel" ) and buffer ._memoryLevel == self ._weightMemoryLevel .name :
76- weightMemoryOccupation += _neurekaWeightBufferSize (buffer )
77-
78- neurekaNodes = [node for node in graph .nodes if node .attrs ["engine" ] == self .neurekaEngineName ]
79- for node in neurekaNodes :
80- if node .op in ["Conv" , "RequantizedConv" ]:
81-
82- if not (ctxt .is_local (node .inputs [1 ].name ) or ctxt .is_global (node .inputs [1 ].name )):
83- continue
84-
85- buffer = ctxt .lookup (node .inputs [1 ].name )
86- if weightMemoryOccupation + _neurekaWeightBufferSize (buffer ) < self ._weightMemoryLevel .size :
87- buffer ._memoryLevel = self ._weightMemoryLevel .name
88- weightMemoryOccupation += _neurekaWeightBufferSize (buffer )
89- return ctxt , graph
49+ return ctxt , graph
0 commit comments