Skip to content

Commit f03b8d0

Browse files
committed
Calculate dynamic and static buffer sizes for non tiling platforms
1 parent c4e4189 commit f03b8d0

6 files changed

Lines changed: 165 additions & 90 deletions

File tree

Deeploy/CommonExtensions/CodeTransformationPasses/MemoryAllocation.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,31 @@ def apply(self,
142142
for buffer in reversed(self.topologicallySortBuffers(outputs + transients)):
143143
assert buffer._live == False, f"Tried to allocate already live buffer {buffer.name}"
144144
buffer._live = True
145+
146+
memoryLevel = "None" if not hasattr(buffer, "_memoryLevel") else buffer._memoryLevel
147+
if memoryLevel not in ctxt._dynamicSize:
148+
ctxt._dynamicSize[memoryLevel] = int(buffer.sizeInBytes())
149+
else:
150+
ctxt._dynamicSize[memoryLevel] += int(buffer.sizeInBytes())
151+
145152
executionBlock.addLeft(buffer.allocTemplate, buffer._bufferRepresentation())
146153

154+
for levels in ctxt._dynamicSize.keys():
155+
if levels not in ctxt._maxDynamicSize:
156+
ctxt._maxDynamicSize[levels] = max(0, ctxt._dynamicSize[levels])
157+
else:
158+
ctxt._maxDynamicSize[levels] = max(ctxt._maxDynamicSize.get(levels, 0), ctxt._dynamicSize[levels])
159+
147160
for buffer in inputs + transients:
148161
assert buffer._live == True, f"Tried to deallocate already dead buffer {buffer.name}"
149162
buffer._live = False
150163
# Don't deallocate if it's an alias of a live buffer
151164
if not buffer.has_live_ancestors(ctxt = ctxt):
165+
memoryLevel = "None" if not hasattr(buffer, "_memoryLevel") else buffer._memoryLevel
166+
if memoryLevel not in ctxt._dynamicSize:
167+
ctxt._dynamicSize[memoryLevel] = 0
168+
else:
169+
ctxt._dynamicSize[memoryLevel] -= int(buffer.sizeInBytes())
152170
executionBlock.addRight(buffer.deallocTemplate, buffer._bufferRepresentation())
153171

154172
return ctxt, executionBlock
@@ -177,10 +195,30 @@ def apply(self,
177195

178196
for buffer in outputs + transients:
179197
assert buffer._live == False, f"Tried to allocate already live buffer {buffer.name}"
198+
199+
memoryLevel = "None" if not hasattr(buffer, "_memoryLevel") else buffer._memoryLevel
200+
if memoryLevel not in ctxt._dynamicSize:
201+
ctxt._dynamicSize[memoryLevel] = int(buffer.sizeInBytes())
202+
else:
203+
ctxt._dynamicSize[memoryLevel] += int(buffer.sizeInBytes())
204+
180205
buffer._live = True
181206

207+
for levels in ctxt._dynamicSize.keys():
208+
if levels not in ctxt._maxDynamicSize:
209+
ctxt._maxDynamicSize[levels] = max(0, ctxt._dynamicSize[levels])
210+
else:
211+
ctxt._maxDynamicSize[levels] = max(ctxt._maxDynamicSize.get(levels, 0), ctxt._dynamicSize[levels])
212+
182213
for buffer in inputs + transients:
183214
assert buffer._live == True, f"Tried to deallocate already dead buffer {buffer.name}"
215+
216+
memoryLevel = "None" if not hasattr(buffer, "_memoryLevel") else buffer._memoryLevel
217+
if memoryLevel not in ctxt._dynamicSize:
218+
ctxt._dynamicSize[memoryLevel] = 0
219+
else:
220+
ctxt._dynamicSize[memoryLevel] -= int(buffer.sizeInBytes())
221+
184222
buffer._live = False
185223

186224
return ctxt, executionBlock

Deeploy/CommonExtensions/NetworkDeployers/NetworkDeployerWrapper.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,9 @@ def generateBufferAllocationCode(self) -> str:
9696
# MultiEngineDeployer augment
9797
def _mapNode(self, node: gs.Node) -> Union[ONNXLayer, Any]:
9898
return self._innerObject._mapNode(node)
99+
100+
def _printMemorySummary(self):
101+
return self._innerObject._printMemorySummary()
102+
103+
def _printInputOutputSummary(self):
104+
return self._innerObject._printInputOutputSummary()

Deeploy/CommonExtensions/NetworkDeployers/SignPropDeployer.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import onnx_graphsurgeon as gs
2929

3030
from Deeploy.AbstractDataTypes import Pointer
31-
from Deeploy.DeeployTypes import CodeGenVerbosity, DeploymentPlatform, NetworkDeployer, TopologyOptimizer, _NoVerbosity
31+
from Deeploy.DeeployTypes import DeploymentPlatform, NetworkDeployer, TopologyOptimizer
3232
from Deeploy.Logging import DEFAULT_LOGGER as log
3333

3434

@@ -64,35 +64,15 @@ def _createIOBindings(self, ctxt, graph):
6464

6565
return ctxt
6666

67-
def generateFunction(self, verbose: CodeGenVerbosity = _NoVerbosity) -> str:
68-
"""Helper function to prepare deployment and return generated function code
69-
70-
"""
71-
72-
if not self.prepared:
73-
self.prepare(verbose = verbose)
74-
75-
log.info("=" * 80)
76-
log.info("Deeploy Code Generation")
77-
log.info("=" * 80)
78-
67+
def _printInputOutputSummary(self):
7968
log.info('Input:')
80-
for name in self.inputTypes.keys():
81-
buf = self.ctxt.lookup(name)
69+
for buf in self.inputs():
8270
log.info(
83-
f" - '{name}': Type: {buf._type.referencedType.typeName}, nLevels: {buf.nLevels}, Signed: {buf._signed}, Offset: {self.inputOffsets[name]}"
71+
f" - '{buf.name}': Type: {buf._type.referencedType.typeName}, nLevels: {buf.nLevels}, Signed: {buf._signed}, Offset: {self.inputOffsets[buf.name]}"
8472
)
8573

8674
log.info('Output:')
8775
for buf in self.outputs():
8876
log.info(
8977
f" - '{buf.name}': Type: {buf._type.referencedType.typeName}, nLevels: {buf.nLevels}, Signed: {buf._signed}"
9078
)
91-
92-
log.info("-" * 80)
93-
num_ops = self.numberOfOps(verbose = True)
94-
log.info("-" * 80)
95-
log.info(f"Number of Ops. : {num_ops}")
96-
log.info(f"Model Parameters : {self.getParameterSize()}")
97-
98-
return self.generateInferenceCode()

Deeploy/DeeployTypes.py

Lines changed: 56 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,17 @@ def has_live_ancestors(self, ctxt: NetworkContext) -> bool:
399399

400400
return False
401401

402+
def sizeInBytes(self) -> int:
403+
"""Returns the size of this VariableBuffer in bytes
404+
405+
Returns
406+
-------
407+
int
408+
Size of this VariableBuffer in bytes
409+
410+
"""
411+
return int(np.prod(self.shape) * (self._type.referencedType.typeWidth // 8))
412+
402413

403414
class TransientBuffer(VariableBuffer):
404415
"""Class to represent memory space required by kernels that is not covered by input and output tensors, e.g. im2col buffers in convolutions
@@ -445,6 +456,9 @@ def __repr__(self) -> str:
445456
def fromVariableBuffer(cls, buffer: VariableBuffer):
446457
ret = cls(name = buffer.name, size = np.prod(buffer.shape) * buffer._type.typeWidth // 8)
447458

459+
def sizeInBytes(self) -> int:
460+
return int(self.size)
461+
448462

449463
class ConstantBuffer(VariableBuffer):
450464
"""Class to represent compile-time constant tensors (weights, biases, other parameters) within Deeploy.
@@ -587,6 +601,9 @@ def __init__(self,
587601
self.TransientBuffer = transientBuffer
588602
self.name = name
589603

604+
self._maxDynamicSize = {} #: int: Maximum dynamic memory size occupied by live buffers at any point in time
605+
self._dynamicSize = {} #: int: Current dynamic memory size occupied by live buffers
606+
590607
def dealiasBuffer(self, name: str) -> str:
591608
"""Function to find the underlying aliased VariableBuffer
592609
@@ -2965,8 +2982,7 @@ def generateIOBufferInitializationCode(self) -> str:
29652982
def worstCaseBufferSize(self):
29662983
"""Return the worst-case buffer size occupied by the network implementaiton
29672984
"""
2968-
# WIESEP: There is no reasonable value for a worst case buffer size without tiling
2969-
raise NotImplementedError("Worst case buffer size is not known or not implemented!")
2985+
return self.ctxt._maxDynamicSize
29702986

29712987
# Don't override this
29722988
def generateBufferInitializationCode(self) -> str:
@@ -3116,54 +3132,6 @@ def generateEngineInitializationCode(self) -> str:
31163132
"""
31173133
return ("\n").join([engine.initCode for engine in self.Platform.engines])
31183134

3119-
# Don't override this - Returns parameter size in bytes
3120-
def getParameterSize(self) -> int:
3121-
"""Return the BYTE size of all static network parameters (weights, biases, parameters,...)
3122-
3123-
Returns
3124-
-------
3125-
int
3126-
Size of all network parameters
3127-
3128-
Raises
3129-
------
3130-
RuntimeError
3131-
Raises a RuntimeError if network is not parsed and bound
3132-
3133-
3134-
"""
3135-
if not self.parsed or not self.bound:
3136-
raise RuntimeError('You need to parse and bind the network before getting RAM Size!')
3137-
3138-
size = 0
3139-
for _buffer in self.ctxt.globalObjects.values():
3140-
# We do not count structs for now, since they are not properly modeled
3141-
if isinstance(_buffer, ConstantBuffer) and _buffer._deploy:
3142-
size += int((np.prod(_buffer.shape) * _buffer._type.typeWidth // 8))
3143-
3144-
return size
3145-
3146-
# Don't override this - Returns worst case layer and buffering size in bytes
3147-
def getTotalSize(self) -> int:
3148-
"""Returns total size of the network, consisting of all parameters and intermediate buffer size
3149-
3150-
Returns
3151-
-------
3152-
int
3153-
Total network size
3154-
3155-
Raises
3156-
------
3157-
RuntimeError
3158-
Raises a RuntimeError if network is not parsed and bound
3159-
3160-
3161-
"""
3162-
if not self.parsed or not self.bound:
3163-
raise RuntimeError('You need to parse and bind the network before getting RAM Size!')
3164-
3165-
return self.getParameterSize() + self.worstCaseBufferSize
3166-
31673135
def numberOfOps(self, verbose: bool) -> int:
31683136
"""Returns the total number of operations per network inference
31693137
@@ -3584,6 +3552,41 @@ def prepare(self, verbose: CodeGenVerbosity = _NoVerbosity):
35843552
self.backEnd(verbose = verbose)
35853553
self.prepared = True
35863554

3555+
def _printInputOutputSummary(self):
3556+
log.info("Input:")
3557+
for buf in self.inputs():
3558+
log.info(f" - '{buf.name}': Type: {buf._type.referencedType.typeName}")
3559+
3560+
log.info('Output:')
3561+
for buf in self.outputs():
3562+
log.info(f" - '{buf.name}': Type: {buf._type.referencedType.typeName}")
3563+
3564+
def _printMemorySummary(self):
3565+
log.info("")
3566+
log.info("Memory Usage Report:")
3567+
log.info(f"Level Total (bytes) (Static + Dynamic) ")
3568+
log.info("-" * 80)
3569+
3570+
_worstCaseBufferSize = self.worstCaseBufferSize
3571+
if len(_worstCaseBufferSize) == 0:
3572+
_worstCaseBufferSize = {"None": 0}
3573+
3574+
for level, dynamicSize in _worstCaseBufferSize.items():
3575+
staticSize = 0
3576+
for _buffer in self.ctxt.globalObjects.values():
3577+
# We do not count structs for now, since they are not properly modeled
3578+
if isinstance(_buffer, ConstantBuffer) or (isinstance(_buffer, VariableBuffer) and _buffer._deploy):
3579+
# SCHEREMO: We only
3580+
if (hasattr(_buffer, "_memoryLevel") and _buffer._memoryLevel == level) or level == "None":
3581+
staticSize += int((np.prod(_buffer.shape) * _buffer._type.referencedType.typeWidth // 8))
3582+
else:
3583+
log.warning(f"Buffer {_buffer.name} does not have a valid memory level")
3584+
3585+
total = staticSize + dynamicSize
3586+
3587+
log.info(f"{level:<22} {total:8,d} "
3588+
f"({staticSize:6,d} + {dynamicSize:7,d}) ")
3589+
35873590
def generateFunction(self, verbose: CodeGenVerbosity = _NoVerbosity) -> str:
35883591
"""Helper function to prepare deployment and return generated function code
35893592
@@ -3596,19 +3599,13 @@ def generateFunction(self, verbose: CodeGenVerbosity = _NoVerbosity) -> str:
35963599
log.info("Deeploy Code Generation")
35973600
log.info("=" * 80)
35983601

3599-
log.info('Input:')
3600-
for name in self.inputTypes.keys():
3601-
buf = self.ctxt.lookup(name)
3602-
log.info(f" - '{name}': Type: {buf._type.referencedType.typeName}")
3603-
3604-
log.info('Output:')
3605-
for buf in self.outputs():
3606-
log.info(f" - '{buf.name}': Type: {buf._type.referencedType.typeName}")
3602+
self._printInputOutputSummary()
36073603

36083604
num_ops = self.numberOfOps(verbose = True)
36093605
log.info("-" * 80)
36103606

36113607
log.info(f"Number of Ops. : {num_ops}")
3612-
log.info(f"Model Parameters : {self.getParameterSize()}")
3608+
3609+
self._printMemorySummary()
36133610

36143611
return self.generateInferenceCode()

Deeploy/MemoryLevelExtension/NetworkDeployers/MemoryLevelDeployer.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from types import MappingProxyType
2929
from typing import Any, Callable, Dict, List, Tuple, Type, Union
3030

31+
import numpy as np
3132
import onnx_graphsurgeon as gs
3233

3334
from Deeploy.AbstractDataTypes import Pointer
@@ -149,6 +150,25 @@ def codeTransform(self, verbose: CodeGenVerbosity = _NoVerbosity):
149150
self.ctxt, self.graph = self.memoryLevelAnnotationOptimizer.optimize(self.ctxt, self.graph)
150151
super().codeTransform(verbose)
151152

153+
def _printMemorySummary(self):
154+
log.info("Memory Usage Report:")
155+
log.info(f"{'Level':<22} {'Capacity (bytes)':>16} {'Total':>8} {'(Static + Dynamic)':<21} {'Usage':<6}")
156+
log.info("-" * 80)
157+
158+
for level, dynamicSize in self.worstCaseBufferSize.items():
159+
staticSize = 0
160+
for _buffer in self.ctxt.globalObjects.values():
161+
# We do not count structs for now, since they are not properly modeled
162+
if isinstance(_buffer, ConstantBuffer) and _buffer._deploy and _buffer._memoryLevel == level:
163+
staticSize += int((np.prod(_buffer.shape) * _buffer._type.referencedType.typeWidth // 8))
164+
165+
capacity = self.Platform.memoryHierarchy.memoryLevels[level].size
166+
total = staticSize + dynamicSize
167+
168+
log.info(f"{level:<22} {capacity:16,} {total:8,d} "
169+
f"({staticSize:6,d} + {dynamicSize:7,d}) "
170+
f"({total / capacity * 100:5.1f}%)")
171+
152172

153173
class MemoryLevelAwareSignPropDeployer(SignPropDeployer):
154174

@@ -206,6 +226,25 @@ def codeTransform(self, verbose: CodeGenVerbosity = _NoVerbosity):
206226
self.ctxt, self.graph = self.memoryLevelAnnotationOptimizer.optimize(self.ctxt, self.graph)
207227
super().codeTransform(verbose)
208228

229+
def _printMemorySummary(self):
230+
log.info("Memory Usage Report:")
231+
log.info(f"{'Level':<22} {'Capacity (bytes)':>16} {'Total':>8} {'(Static + Dynamic)':<21} {'Usage':<6}")
232+
log.info("-" * 80)
233+
234+
for level, dynamicSize in self.worstCaseBufferSize.items():
235+
staticSize = 0
236+
for _buffer in self.ctxt.globalObjects.values():
237+
# We do not count structs for now, since they are not properly modeled
238+
if isinstance(_buffer, ConstantBuffer) and _buffer._deploy and _buffer._memoryLevel == level:
239+
staticSize += int((np.prod(_buffer.shape) * _buffer._type.referencedType.typeWidth // 8))
240+
241+
capacity = self.Platform.memoryHierarchy.memoryLevels[level].size
242+
total = staticSize + dynamicSize
243+
244+
log.info(f"{level:<22} {capacity:16,} {total:8,d} "
245+
f"({staticSize:6,d} + {dynamicSize:7,d}) "
246+
f"({total / capacity * 100:5.1f}%)")
247+
209248

210249
class MemoryDeployerWrapper(NetworkDeployerWrapper):
211250

@@ -253,3 +292,22 @@ def bind(self):
253292
def codeTransform(self, verbose: CodeGenVerbosity = _NoVerbosity):
254293
self.ctxt, self.graph = self.memoryLevelAnnotationOptimizer.optimize(self.ctxt, self.graph)
255294
super().codeTransform(verbose)
295+
296+
def _printMemorySummary(self):
297+
log.info("Memory Usage Report:")
298+
log.info(f"{'Level':<22} {'Capacity (bytes)':>16} {'Total':>8} {'(Static + Dynamic)':<21} {'Usage':<6}")
299+
log.info("-" * 80)
300+
301+
for level, dynamicSize in self.worstCaseBufferSize.items():
302+
staticSize = 0
303+
for _buffer in self.ctxt.globalObjects.values():
304+
# We do not count structs for now, since they are not properly modeled
305+
if isinstance(_buffer, ConstantBuffer) and _buffer._deploy and _buffer._memoryLevel == level:
306+
staticSize += int((np.prod(_buffer.shape) * _buffer._type.referencedType.typeWidth // 8))
307+
308+
capacity = self.Platform.memoryHierarchy.memoryLevels[level].size
309+
total = staticSize + dynamicSize
310+
311+
log.info(f"{level:<22} {capacity:16,} {total:8,d} "
312+
f"({staticSize:6,d} + {dynamicSize:7,d}) "
313+
f"({total / capacity * 100:5.1f}%)")

0 commit comments

Comments
 (0)