Skip to content

Commit e536374

Browse files
Improved Memory Visualization and Multi-Layer Tiling Profiling (pulp-platform#56)
* Improve memory alloc visualization * Multi-level profiling + Linting * profilling string change to const static * Fix profiling dual loop issue * Fix README Status Badges * Update CHANGELOG * Align comment and type hint * Refactor profiling methods in TilingPrototype --------- Co-authored-by: Run Wang <52746141+SamanthaWangdl@users.noreply.github.com>
1 parent 7f31f19 commit e536374

16 files changed

Lines changed: 301 additions & 241 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ This release containing major architectural changes, new platform support, enhan
77
### List of Pull Requests
88
- Move PULP SDK to main branch/fork [#88](https://github.com/pulp-platform/Deeploy/pull/88)
99
- Finite Lifetime for IO Tensors [#51](https://github.com/pulp-platform/Deeploy/pull/51)
10+
- Improved Memory Visualization and Multi-Layer Tiling Profiling [#56](https://github.com/pulp-platform/Deeploy/pull/56)
1011
- Fix Linting in CI and Reformat C Files [#86](https://github.com/pulp-platform/Deeploy/pull/86)
1112
- Fix Broken CMake Flow For pulp-sdk [#87](https://github.com/pulp-platform/Deeploy/pull/87)
1213
- Refactor Changelog For Release [#85](https://github.com/pulp-platform/Deeploy/pull/85)
@@ -62,6 +63,7 @@ This release containing major architectural changes, new platform support, enhan
6263
- Test the correcteness of the memory map generated by the tiler
6364
- Add attribute to `VariableBuffer` to distinguish I/Os
6465
- Add proper static memory allocation with finite lifetime for I/Os
66+
- The memory allocation visualization now displays the allocation for each level used
6567
- Tutorial section in the documentation
6668
- Guide on using the debug print topology pass and code transformation
6769
- VSCode configuration files for improved IDE support
@@ -176,6 +178,8 @@ This release containing major architectural changes, new platform support, enhan
176178
### Changed
177179
- Moved PULP SDK from Victor-Jung/pulp-sdk branch deeploy to pulp-platform/pulp-sdk branch main.
178180
- Memory arena buffers are now declared at the beginning of the `InitNetwork` function
181+
- Tiling profiling is now an ON/OFF version where you get the I/O DMA time for each DMA call
182+
- The profiling strings are const static, such that they are stored in .rodata
179183
- Adapt the select docker image stage to also select a runner depending on ` github.repository`
180184
- Adapt the jobs and reusable workflows to use the selected runner.
181185
- Updated `README.md` description to use a persistent development container

Deeploy/DeeployTypes.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,8 @@ class CodeGenVerbosity:
6969
Encapsulates verbosity options for downstream configuration
7070
"""
7171

72-
tilingProfiling: Optional[str] #: str: Specifies the name of the memory level on which to profile tiling
73-
untiledProfiling: Optional[
74-
bool] = None #: str: Specifies the name of the memory level on which to profile untiled code
72+
tilingProfiling: Optional[bool] = False # Specifies if we should profile the tiling code
73+
untiledProfiling: Optional[bool] = None # Specifies if we should profile the untilied code
7574

7675

7776
_NoVerbosity = CodeGenVerbosity(None)

Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTiling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def apply(self,
4545
name: str,
4646
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
4747

48-
if verbose.tilingProfiling == "L2":
48+
if verbose.tilingProfiling:
4949
ctxt, executionBlock = self.profilingSB.apply(ctxt, executionBlock, name)
5050
ctxt, executionBlock = self.profilingDB.apply(ctxt, executionBlock, name)
5151
else:

Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTilingDB.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,8 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
314314
metaInfo = TilingMetaInfo(nodeName = operatorRepresentation['nodeName'] + "_L2",
315315
nodeOps = operatorRepresentation['nodeOps'],
316316
numTiles = len(tilingSchedule.outputLoadSchedule),
317-
tileIdxVar = "TILING_I")
317+
tileIdxVar = "TILING_I",
318+
kernelLevelTiling = True)
318319

319320
newExecutionBlock = self.generateAllTilingCode(executionBlock, metaInfo, ingressDMATransferCalls,
320321
ingressDMAWaitStatements[-1:], ingressDMAUpdates,

Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPClusterTilingSB.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,8 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
627627
metaInfo = TilingMetaInfo(nodeName = operatorRepresentation['nodeName'] + "_L2",
628628
nodeOps = operatorRepresentation['nodeOps'],
629629
numTiles = len(tilingSchedule.outputLoadSchedule),
630-
tileIdxVar = "TILING_I")
630+
tileIdxVar = "TILING_I",
631+
kernelLevelTiling = True)
631632

632633
newExecutionBlock = self.generateAllTilingCode(executionBlock, metaInfo, ingressDMATransferCalls,
633634
ingressDMAWaitStatements, ingressDMAUpdates,

Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPL3Tiling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def apply(self,
4545
name: str,
4646
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
4747

48-
if verbose.tilingProfiling == "L3":
48+
if verbose.tilingProfiling:
4949
ctxt, executionBlock = self.profilingSB.apply(ctxt, executionBlock, name)
5050
ctxt, executionBlock = self.profilingDB.apply(ctxt, executionBlock, name)
5151
else:

Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPL3TilingDB.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,8 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
284284
metaInfo = TilingMetaInfo(nodeName = operatorRepresentation['nodeName'] + "_L3",
285285
nodeOps = operatorRepresentation['nodeOps'],
286286
numTiles = len(tilingSchedule.outputLoadSchedule),
287-
tileIdxVar = "TILING_I")
287+
tileIdxVar = "TILING_I",
288+
kernelLevelTiling = False)
288289

289290
newExecutionBlock = self.generateAllTilingCode(executionBlock, metaInfo, ingressDMATransferCalls,
290291
ingressDMAWaitStatements, ingressDMAUpdates,

Deeploy/Targets/PULPOpen/CodeTransformationPasses/PULPL3TilingSB.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,11 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
421421
})
422422
]
423423

424-
metaInfo = TilingMetaInfo(nodeName = operatorRepresentation['nodeName'],
424+
metaInfo = TilingMetaInfo(nodeName = operatorRepresentation['nodeName'] + "_L3",
425425
nodeOps = operatorRepresentation['nodeOps'],
426426
numTiles = len(tilingSchedule.outputLoadSchedule),
427-
tileIdxVar = "TILING_I")
427+
tileIdxVar = "TILING_I",
428+
kernelLevelTiling = False)
428429

429430
newExecutionBlock = self.generateAllTilingCode(executionBlock, metaInfo, ingressDMATransferCalls,
430431
ingressDMAWaitStatements, ingressDMAUpdates,

Deeploy/Targets/Snitch/CodeTransformationPasses/SnitchClusterTiling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def apply(self,
4141
name: str,
4242
verbose: CodeGenVerbosity = _NoVerbosity) -> Tuple[NetworkContext, ExecutionBlock]:
4343

44-
if verbose.tilingProfiling == "L2":
44+
if verbose.tilingProfiling:
4545
raise NotImplementedError("Profiling not implemented for L2")
4646
# ctxt, executionBlock = self.profilingSB.apply(ctxt, executionBlock, name)
4747
else:

Deeploy/Targets/Snitch/CodeTransformationPasses/SnitchClusterTilingSB.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,8 @@ def _tilingLoop(self, ctxt: NetworkContext, executionBlock: ExecutionBlock,
478478
metaInfo = TilingMetaInfo(nodeName = operatorRepresentation['nodeName'] + "_L2",
479479
nodeOps = operatorRepresentation['nodeOps'],
480480
numTiles = len(tilingSchedule.outputLoadSchedule),
481-
tileIdxVar = "TILING_I")
481+
tileIdxVar = "TILING_I",
482+
kernelLevelTiling = True)
482483

483484
newExecutionBlock = self.generateAllTilingCode(executionBlock, metaInfo, ingressDMATransferCalls,
484485
ingressDMAWaitStatements, ingressDMAUpdates,

0 commit comments

Comments
 (0)