|
2 | 2 | # |
3 | 3 | # SPDX-License-Identifier: Apache-2.0 |
4 | 4 |
|
5 | | -from typing import List, Union |
| 5 | +from typing import Dict, List, Optional, Tuple, Union |
6 | 6 |
|
7 | 7 | from ortools.constraint_solver.pywrapcp import IntVar |
8 | 8 |
|
9 | 9 | from Deeploy.DeeployTypes import NetworkContext, SubGraph, TransientBuffer |
| 10 | +from Deeploy.TilingExtension.MemoryConstraints import PatternMemoryConstraints |
| 11 | +from Deeploy.TilingExtension.MemoryScheduler import MemoryScheduler |
10 | 12 | from Deeploy.TilingExtension.TilerExtension import Tiler |
11 | 13 | from Deeploy.TilingExtension.TilerModel import TilerModel |
12 | 14 |
|
@@ -43,3 +45,28 @@ class SBTiler(Tiler): |
43 | 45 | def multiBufferStrategy(self, tilerModel: TilerModel, ctxt: NetworkContext, pattern: SubGraph, path: List[str], |
44 | 46 | hop: str, tensorName: str) -> Union[int, IntVar]: |
45 | 47 | return 1 |
| 48 | + |
| 49 | + |
| 50 | +class TrainingMemoryScheduler(MemoryScheduler): |
| 51 | + """MemoryScheduler variant for training networks. |
| 52 | +
|
| 53 | + Extends input tensor lifetimes to the end of the full tiling schedule so |
| 54 | + that forward-pass inputs remain live during the backward pass. |
| 55 | + """ |
| 56 | + |
| 57 | + def _calculateLifetimes( |
| 58 | + self, ctxt: NetworkContext, patternMemoryConstraint: PatternMemoryConstraints, |
| 59 | + memoryLevel: str) -> Tuple[Dict[str, Tuple[int, int]], Dict]: |
| 60 | + tensorLifetimeMap, tensorMap = super()._calculateLifetimes(ctxt, patternMemoryConstraint, memoryLevel) |
| 61 | + |
| 62 | + maxStepIdx = len(patternMemoryConstraint.nodeConstraints) |
| 63 | + for tensorName, lifetime in tensorLifetimeMap.items(): |
| 64 | + buffer = ctxt.lookup(tensorName) |
| 65 | + if buffer.is_input: |
| 66 | + tensorLifetimeMap[tensorName] = (0, maxStepIdx) |
| 67 | + |
| 68 | + return tensorLifetimeMap, tensorMap |
| 69 | + |
| 70 | + |
| 71 | +class TrainingSBTiler(SBTiler): |
| 72 | + memorySchedulerClass = TrainingMemoryScheduler |
0 commit comments