Skip to content

Commit 11a641b

Browse files
committed
[NE16] isolate NE16 GEMM mappers and headers from shared GAP9 platform
NE16-specific includes (CNN_BasicKernels_NE16.h, ne16_utils.h), GEMM mappers, and NE16AdjustGEMMWeightLayoutPass were added to the shared GAP9 platform code, causing plain GAP9 builds to fail with missing NE16 headers. Move them into NE16/Platform.py so only GAP9_w_NE16 picks them up.
1 parent 2373ff9 commit 11a641b

2 files changed

Lines changed: 36 additions & 28 deletions

File tree

Deeploy/Targets/GAP9/Platform.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,21 @@
1111
NodeTemplate, StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
1212
from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
1313
from Deeploy.MemoryLevelExtension.NetworkDeployers.MemoryLevelDeployer import MemoryPlatform, MemoryPlatformWrapper
14-
from Deeploy.Targets.GAP9.Parsers import NE16GEMMParser
1514
from Deeploy.Targets.GAP9.Templates import AllocateTemplate, FreeTemplate
1615
# Import GAP9-specific tiler bindings
1716
from Deeploy.Targets.GAP9.Tiler import DeQuantTilingReadyBindings, GAP9AddTilingReadyBindings, \
1817
GAP9ConcatTilingReadyBindings, GAP9Conv2DTilingReadyBindings, GAP9DWConv2DTilingReadyBindings, \
1918
GAP9FlattenTilingReadyBindings, GAP9FPGELUTilingReadyBindings, GAP9FPGEMMTilingReadyBindings, \
2019
GAP9GatherTilingReadyBindings, GAP9iHardswishTilingReadyBindings, GAP9iRMSNormTilingReadyBindings, \
2120
GAP9iRQSGELUTilingReadyBindings, GAP9LayernormTilingReadyBindings, GAP9MatMulTilingReadyBindings, \
22-
GAP9MaxPool2DTilingReadyBindings, GAP9MulTilingReadyBindings, GAP9NE16GEMMInt32TilingReadyBindings, \
23-
GAP9NE16RQSGEMMTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, GAP9ReluTilingReadyBindings, \
21+
GAP9MaxPool2DTilingReadyBindings, GAP9MulTilingReadyBindings, GAP9ReduceSumTilingReadyBindings, \
22+
GAP9ReluTilingReadyBindings, \
2423
GAP9RQAddTilingReadyBindings, GAP9RQSConv2DTilingReadyBindings, GAP9RQSDWConv2DTilingReadyBindings, \
2524
GAP9RQSGEMMTilingReadyBindings, GAP9RQSiHardswishTilingReadyBindings, GAP9RQSMatrixVecTilingReadyBindings, \
2625
GAP9RQSTallGEMMTilingReadyBindings, GAP9RQSTilingReadyBindings, GAP9SGDTilingReadyBindings, \
2726
GAP9SoftmaxCrossEntropyGradTilingReadyBindings, GAP9SoftmaxCrossEntropyTilingReadyBindings, \
2827
GAP9SoftmaxGradTilingReadyBindings, GAP9SoftmaxTilingReadyBindings, GAP9TransposeTilingReadyBindings, \
2928
GAP9UniformRQSTilingReadyBindings, QuantTilingReadyBindings
30-
from Deeploy.Targets.GAP9.TopologyOptimizationPasses.Passes import NE16AdjustGEMMWeightLayoutPass
3129
from Deeploy.Targets.Generic.Bindings import BasicGEMMBindings, BasicPad1DBindings, BasicPad2DBindings, \
3230
BasicRQIntegerDivBinding
3331
from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, ConvLayer, GatherLayer, GELULayer, GEMMLayer, \
@@ -104,8 +102,6 @@
104102
GAP9_QuantMapper = NodeMapper(QuantParser(), QuantTilingReadyBindings)
105103
GAP9_DequantMapper = NodeMapper(DequantParser(), DeQuantTilingReadyBindings)
106104
GAP9_GEMMDequantMapper = NodeMapper(PULPGEMMParser(), BasicGEMMBindings)
107-
GAP9_NE16GEMMMapper = NodeMapper(NE16GEMMParser(), GAP9NE16RQSGEMMTilingReadyBindings)
108-
GAP9_NE16GEMMInt32Mapper = NodeMapper(GEMMParser(), GAP9NE16GEMMInt32TilingReadyBindings)
109105

110106
GAP9Optimizer = TopologyOptimizer(
111107
[
@@ -129,7 +125,6 @@
129125
# PULPAddRequantMergePass(),
130126
RemoveEmptyConvBiasPass(),
131127
RemoveOnlySingletonReduceMeanPass(),
132-
NE16AdjustGEMMWeightLayoutPass(),
133128
],
134129
name = "GAP9Optimizer")
135130

@@ -140,9 +135,9 @@
140135
'RequantizedConv':
141136
PULPRQSConvLayer([GAP9_Conv2DMapper, GAP9_DWConv2DMapper, GAP9_Conv1DMapper, GAP9_DWConv1DMapper]),
142137
'RequantizedGemm':
143-
PULPRQSGEMMLayer([GAP9_NE16GEMMMapper, GAP9_MatrixVecMapper, GAP9_TallGEMMMapper, GAP9_GEMMMapper]),
138+
PULPRQSGEMMLayer([GAP9_MatrixVecMapper, GAP9_TallGEMMMapper, GAP9_GEMMMapper]),
144139
'Gemm':
145-
GEMMLayer([GAP9_NE16GEMMInt32Mapper, GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper]),
140+
GEMMLayer([GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper]),
146141
'Gelu':
147142
GELULayer([GAP9_GELUMapper]),
148143
'LayerNormalization':
@@ -284,8 +279,7 @@ class GAP9StructBuffer(StructBuffer):
284279

285280

286281
_includeList = [
287-
"pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h", "CNN_BasicKernels_fp32.h",
288-
"CNN_BasicKernels_NE16.h", "CNN_Copy.h", "ne16_utils.h"
282+
"pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h", "CNN_BasicKernels_fp32.h", "CNN_Copy.h"
289283
]
290284

291285

Deeploy/Targets/NE16/Platform.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,39 @@
66

77
from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
88
RequantizedGemmToPwPass
9-
from Deeploy.DeeployTypes import TopologyOptimizer
9+
from Deeploy.DeeployTypes import NodeMapper, TopologyOptimizer
1010
from Deeploy.MemoryLevelExtension.MemoryLevels import MemoryHierarchy, MemoryLevel
11-
from Deeploy.Targets.GAP9.Platform import GAP9ClusterEngine, GAP9ConstantBuffer, GAP9Platform, GAP9StructBuffer, \
12-
GAP9TransientBuffer, GAP9VariableBuffer, MemoryGAP9Platform, MemoryGAP9PlatformWrapper
11+
from Deeploy.Targets.GAP9.Parsers import NE16GEMMParser
12+
from Deeploy.Targets.GAP9.Platform import GAP9ClusterEngine, GAP9ConstantBuffer, GAP9Mapping, GAP9Platform, \
13+
GAP9StructBuffer, GAP9TransientBuffer, GAP9VariableBuffer, GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper, \
14+
GAP9_GEMMMapper, GAP9_MatrixVecMapper, GAP9_TallGEMMMapper, MemoryGAP9Platform, MemoryGAP9PlatformWrapper
15+
from Deeploy.Targets.GAP9.Tiler import GAP9NE16GEMMInt32TilingReadyBindings, GAP9NE16RQSGEMMTilingReadyBindings
16+
from Deeploy.Targets.GAP9.TopologyOptimizationPasses.Passes import NE16AdjustGEMMWeightLayoutPass
17+
from Deeploy.Targets.Generic.Layers import GEMMLayer
18+
from Deeploy.Targets.Generic.Parsers import GEMMParser
1319
from Deeploy.Targets.NE16.Engine import NE16Engine
20+
from Deeploy.Targets.PULPOpen.Layers import PULPRQSGEMMLayer
1421
from Deeploy.Targets.PULPOpen.Platform import PULPOptimizer
1522

23+
# NE16-specific GEMM mappers (run on the cluster engine using GAP9 SDK NE16 kernels)
24+
GAP9_NE16GEMMMapper = NodeMapper(NE16GEMMParser(), GAP9NE16RQSGEMMTilingReadyBindings)
25+
GAP9_NE16GEMMInt32Mapper = NodeMapper(GEMMParser(), GAP9NE16GEMMInt32TilingReadyBindings)
26+
27+
# Build a cluster mapping that includes the NE16 GEMM mappers
28+
_NE16ClusterMapping = dict(GAP9Mapping)
29+
_NE16ClusterMapping['RequantizedGemm'] = PULPRQSGEMMLayer(
30+
[GAP9_NE16GEMMMapper, GAP9_MatrixVecMapper, GAP9_TallGEMMMapper, GAP9_GEMMMapper])
31+
_NE16ClusterMapping['Gemm'] = GEMMLayer([GAP9_NE16GEMMInt32Mapper, GAP9_FloatGEMMMapper, GAP9_GEMMDequantMapper])
32+
33+
# NE16 include list for the cluster engine: add SDK NE16 headers for GEMM kernels
34+
_NE16ClusterIncludeList = [
35+
"pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h", "CNN_BasicKernels_fp32.h",
36+
"CNN_BasicKernels_NE16.h", "CNN_Copy.h", "ne16_utils.h"
37+
]
38+
1639
NE16Optimizer = TopologyOptimizer([
1740
*PULPOptimizer.passes,
41+
NE16AdjustGEMMWeightLayoutPass(),
1842
RequantizedGemmToPwPass(),
1943
], name = "NE16Optimizer")
2044

@@ -28,15 +52,10 @@ def __init__(self,
2852
structBuffer = GAP9StructBuffer,
2953
transientBuffer = GAP9TransientBuffer) -> None:
3054
if engines is None:
31-
# Drop SDK NE16 headers from the cluster engine include list so the
32-
# generated Network.c does not pull in CNN_BasicKernels_NE16.h /
33-
# ne16_utils.h alongside pulp-nnx's ne16_task_defs.h
34-
# (NE16_REG_* macros are defined in both, causing -Werror redefs).
3555
cluster = GAP9ClusterEngine(
3656
"GAP9Cluster",
37-
includeList = [
38-
"pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h", "CNN_BasicKernels_fp32.h"
39-
],
57+
Mapping = _NE16ClusterMapping,
58+
includeList = _NE16ClusterIncludeList,
4059
)
4160
engines = [NE16Engine("NE16"), cluster]
4261
super().__init__(engines, variableBuffer, constantBuffer, structBuffer, transientBuffer)
@@ -54,15 +73,10 @@ def __init__(self,
5473
structBuffer = GAP9StructBuffer,
5574
transientBuffer = GAP9TransientBuffer) -> None:
5675
if engines is None:
57-
# Drop SDK NE16 headers from the cluster engine include list so the
58-
# generated Network.c does not pull in CNN_BasicKernels_NE16.h /
59-
# ne16_utils.h alongside pulp-nnx's ne16_task_defs.h
60-
# (NE16_REG_* macros are defined in both, causing -Werror redefs).
6176
cluster = GAP9ClusterEngine(
6277
"GAP9Cluster",
63-
includeList = [
64-
"pmsis.h", "DeeployGAP9Math.h", "pulp_nn_kernels.h", "DeeployMchan.h", "CNN_BasicKernels_fp32.h"
65-
],
78+
Mapping = _NE16ClusterMapping,
79+
includeList = _NE16ClusterIncludeList,
6680
)
6781
engines = [NE16Engine("NE16"), cluster]
6882
super().__init__(memoryHierarchy, defaultTargetMemoryLevel, engines, variableBuffer, constantBuffer,

0 commit comments

Comments
 (0)