|
5 | 5 | import numpy as np |
6 | 6 | import onnx_graphsurgeon as gs |
7 | 7 |
|
| 8 | +from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \ |
| 9 | + RemoveEmptyConvBiasPass, RemoveOnlySingletonReduceMeanPass |
8 | 10 | from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NetworkContext, NodeMapper, \ |
9 | | - NodeTemplate, StructBuffer, TransientBuffer, VariableBuffer |
| 11 | + NodeTemplate, StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer |
10 | 12 | from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel |
11 | 13 | from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper |
| 14 | +from Deeploy.Targets.GAP9.Parsers import NE16GEMMParser |
12 | 15 | from Deeploy.Targets.GAP9.Templates import AllocateTemplate, FreeTemplate |
13 | 16 | # Import GAP9-specific tiler bindings |
14 | | -from Deeploy.Targets.GAP9.Tiler import GAP9AddTilingReadyBindings, GAP9ConcatTilingReadyBindings, \ |
15 | | - GAP9Conv2DTilingReadyBindings, GAP9DWConv2DTilingReadyBindings, GAP9FlattenTilingReadyBindings, \ |
16 | | - GAP9FPGELUTilingReadyBindings, GAP9FPGEMMTilingReadyBindings, GAP9GatherTilingReadyBindings, \ |
17 | | - GAP9iHardswishTilingReadyBindings, GAP9iRMSNormTilingReadyBindings, GAP9iRQSGELUTilingReadyBindings, \ |
18 | | - GAP9LayernormTilingReadyBindings, GAP9MatMulTilingReadyBindings, GAP9MaxPool2DTilingReadyBindings, \ |
19 | | - GAP9MulTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, GAP9ReluTilingReadyBindings, \ |
| 17 | +from Deeploy.Targets.GAP9.Tiler import DeQuantTilingReadyBindings, GAP9AddTilingReadyBindings, \ |
| 18 | + GAP9ConcatTilingReadyBindings, GAP9Conv2DTilingReadyBindings, GAP9DWConv2DTilingReadyBindings, \ |
| 19 | + GAP9FlattenTilingReadyBindings, GAP9FPGELUTilingReadyBindings, GAP9FPGEMMTilingReadyBindings, \ |
| 20 | + GAP9GatherTilingReadyBindings, GAP9iHardswishTilingReadyBindings, GAP9iRMSNormTilingReadyBindings, \ |
| 21 | + GAP9iRQSGELUTilingReadyBindings, GAP9LayernormTilingReadyBindings, GAP9MatMulTilingReadyBindings, \ |
| 22 | + GAP9MaxPool2DTilingReadyBindings, GAP9MulTilingReadyBindings, GAP9NE16GEMMInt32TilingReadyBindings, \ |
| 23 | + GAP9NE16RQSGEMMTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, GAP9ReluTilingReadyBindings, \ |
20 | 24 | GAP9RQAddTilingReadyBindings, GAP9RQSConv2DTilingReadyBindings, GAP9RQSDWConv2DTilingReadyBindings, \ |
21 | 25 | GAP9RQSGEMMTilingReadyBindings, GAP9RQSiHardswishTilingReadyBindings, GAP9RQSMatrixVecTilingReadyBindings, \ |
22 | 26 | GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, GAP9SGDTilingReadyBindings, \ |
23 | 27 | GAP9SoftmaxCrossEntropyGradTilingReadyBindings, GAP9SoftmaxCrossEntropyTilingReadyBindings, \ |
24 | 28 | GAP9SoftmaxGradTilingReadyBindings, GAP9SoftmaxTilingReadyBindings, GAP9TransposeTilingReadyBindings, \ |
25 | | - GAP9UniformRQSTilingReadyBindings |
| 29 | + GAP9UniformRQSTilingReadyBindings, QuantTilingReadyBindings |
| 30 | +from Deeploy.Targets.GAP9.TopologyOptimizationPasses.Passes import NE16AdjustGEMMWeightLayoutPass |
26 | 31 | from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \ |
27 | 32 | BasicRQIntegerDivBinding |
28 | 33 | from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELULayer, GEMMLayer, \ |
|
37 | 42 | SoftmaxCrossEntropyLossGradParser, SoftmaxCrossEntropyLossParser, SoftmaxGradParser, SoftmaxParser, \ |
38 | 43 | TransposeParser, UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser |
39 | 44 | from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate |
40 | | -from Deeploy.Targets.PULPOpen.Bindings import BasicDequantBindings, BasicQuantBindings, PULPDMASliceBindings, \ |
41 | | - PULPDWConv1DBinding, PULPReduceMeanBindings, PULPRQSConv1DBindings, PULPSliceBindings |
| 45 | +from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, DequantQuantMergePass, \ |
| 46 | + IntegerDivRequantMergePass, MatMulAddMergePass, MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, \ |
| 47 | + QuantPatternPass, RQSSplitPass, SkipEmptyConcatPass, SkipUnityRequantPass, iGELURequantMergePass, \ |
| 48 | + iHardswishRequantMergePass |
| 49 | +from Deeploy.Targets.PULPOpen.Bindings import BasicDequantBindings, BasicQuantBindings, PULPConv1DBinding, \ |
| 50 | + PULPDMASliceBindings, PULPDWConv1DBinding, PULPReduceMeanBindings, PULPRQSConv1DBindings, PULPSliceBindings |
42 | 51 | from Deeploy.Targets.PULPOpen.Layers import PULPRQSConvLayer, PULPRQSGEMMLayer |
43 | 52 | from Deeploy.Targets.PULPOpen.Parsers import PULPConv1DParser, PULPConv2DParser, PULPDWConv1DParser, \ |
44 | 53 | PULPDWConv2DParser, PULPFPConv2DParser, PULPFPDWConv2DParser, PULPGEMMParser, PULPMatrixVecParser, \ |
45 | 54 | PULPTallGEMMParser |
| 55 | +from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPConvRequantMergePass, \ |
| 56 | + PULPGEMMRequantMergePass, PULPMatMulRequantMergePass |
46 | 57 |
|
47 | 58 | # Create GAP9-specific NodeMappers |
48 | 59 | GAP9_RQAddMapper = NodeMapper(RQAddParser(), GAP9RQAddTilingReadyBindings) |
|
90 | 101 | GAP9_SoftmaxCrossEntropyLossGradMapper = NodeMapper(SoftmaxCrossEntropyLossGradParser(), |
91 | 102 | GAP9SoftmaxCrossEntropyGradTilingReadyBindings) |
92 | 103 | GAP9_SGDMapper = NodeMapper(SGDParser(), GAP9SGDTilingReadyBindings) |
93 | | -GAP9_QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings) |
94 | | -GAP9_DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings) |
| 104 | +GAP9_QuantMapper = NodeMapper(QuantParser(), QuantTilingReadyBindings) |
| 105 | +GAP9_DequantMapper = NodeMapper(DequantParser(), DeQuantTilingReadyBindings) |
95 | 106 | GAP9_GEMMDequantMapper = NodeMapper(PULPGEMMParser(), BasicGEMMBindings) |
| 107 | +GAP9_NE16GEMMMapper = NodeMapper(NE16GEMMParser(), GAP9NE16RQSGEMMTilingReadyBindings) |
| 108 | +GAP9_NE16GEMMInt32Mapper = NodeMapper(GEMMParser(), GAP9NE16GEMMInt32TilingReadyBindings) |
| 109 | + |
| 110 | +GAP9Optimizer = TopologyOptimizer( |
| 111 | + [ |
| 112 | + QuantPatternPass(), |
| 113 | + DequantPatternPass(), |
| 114 | + DequantQuantMergePass(), |
| 115 | + MatMulAddMergePass(), |
| 116 | + SkipEmptyConcatPass(), |
| 117 | + SkipUnityRequantPass(previous_op_regex = "Concat", num_inputs = 2), |
| 118 | + SkipUnityRequantPass(previous_op_regex = "Reshape|Transpose", num_inputs = 1), |
| 119 | + SkipUnityRequantPass(previous_op_regex = "Reshape|Transpose", num_inputs = 1), |
| 120 | + RQSSplitPass(), |
| 121 | + MergeTrueIntegerDivRequantShiftPass(), |
| 122 | + IntegerDivRequantMergePass(), |
| 123 | + iGELURequantMergePass(), |
| 124 | + iHardswishRequantMergePass(), |
| 125 | + PULPConvRequantMergePass(), |
| 126 | + MergeConstAddAndRequantPass(), |
| 127 | + PULPGEMMRequantMergePass(), |
| 128 | + PULPMatMulRequantMergePass(), |
| 129 | + # PULPAddRequantMergePass(), |
| 130 | + RemoveEmptyConvBiasPass(), |
| 131 | + RemoveOnlySingletonReduceMeanPass(), |
| 132 | + NE16AdjustGEMMWeightLayoutPass(), |
| 133 | + ], |
| 134 | + name = "GAP9Optimizer") |
96 | 135 |
|
97 | 136 | # GAP9-specific mapping using ClDma |
98 | 137 | GAP9Mapping = { |
|
101 | 140 | 'RequantizedConv': |
102 | 141 | PULPRQSConvLayer([GAP9_Conv2DMapper, GAP9_DWConv2DMapper, GAP9_Conv1DMapper, GAP9_DWConv1DMapper]), |
103 | 142 | 'RequantizedGemm': |
104 | | - PULPRQSGEMMLayer([GAP9_MatrixVecMapper, GAP9_TallGEMMMapper, GAP9_GEMMMapper]), |
| 143 | + PULPRQSGEMMLayer([GAP9_NE16GEMMMapper, GAP9_MatrixVecMapper, GAP9_TallGEMMMapper, GAP9_GEMMMapper]), |
105 | 144 | 'Gemm': |
106 | | - GEMMLayer([GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper]), |
| 145 | + GEMMLayer([GAP9_NE16GEMMInt32Mapper, GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper]), |
107 | 146 | 'Gelu': |
108 | 147 | GELULayer([GAP9_GELUMapper]), |
109 | 148 | 'LayerNormalization': |
@@ -244,7 +283,10 @@ class GAP9StructBuffer(StructBuffer): |
244 | 283 | deallocTemplate = NodeTemplate("") |
245 | 284 |
|
246 | 285 |
|
247 | | -_includeList = ["pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h"] |
| 286 | +_includeList = [ |
| 287 | + "pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h", "CNN_BasicKernels_fp32.h", |
| 288 | + "CNN_BasicKernels_NE16.h", "CNN_Copy.h", "ne16_utils.h" |
| 289 | +] |
248 | 290 |
|
249 | 291 |
|
250 | 292 | class GAP9ClusterEngine(DeploymentEngine): |
|
0 commit comments