66
77from Deeploy .CommonExtensions .OptimizationPasses .TopologyOptimizationPasses .LoweringOptimizationPasses import \
88 RequantizedGemmToPwPass
9- from Deeploy .DeeployTypes import TopologyOptimizer
9+ from Deeploy .DeeployTypes import NodeMapper , TopologyOptimizer
1010from Deeploy .MemoryLevelExtension .MemoryLevels import MemoryHierarchy , MemoryLevel
11- from Deeploy .Targets .GAP9 .Platform import GAP9ClusterEngine , GAP9ConstantBuffer , GAP9Platform , GAP9StructBuffer , \
12- GAP9TransientBuffer , GAP9VariableBuffer , MemoryGAP9Platform , MemoryGAP9PlatformWrapper
11+ from Deeploy .Targets .GAP9 .Parsers import NE16GEMMParser
12+ from Deeploy .Targets .GAP9 .Platform import GAP9ClusterEngine , GAP9ConstantBuffer , GAP9Mapping , GAP9Platform , \
13+ GAP9StructBuffer , GAP9TransientBuffer , GAP9VariableBuffer , GAP9_FloatGEMMMapper , GAP9_GEMMDequantMapper , \
14+ GAP9_GEMMMapper , GAP9_MatrixVecMapper , GAP9_TallGEMMMapper , MemoryGAP9Platform , MemoryGAP9PlatformWrapper
15+ from Deeploy .Targets .GAP9 .Tiler import GAP9NE16GEMMInt32TilingReadyBindings , GAP9NE16RQSGEMMTilingReadyBindings
16+ from Deeploy .Targets .GAP9 .TopologyOptimizationPasses .Passes import NE16AdjustGEMMWeightLayoutPass
17+ from Deeploy .Targets .Generic .Layers import GEMMLayer
18+ from Deeploy .Targets .Generic .Parsers import GEMMParser
1319from Deeploy .Targets .NE16 .Engine import NE16Engine
20+ from Deeploy .Targets .PULPOpen .Layers import PULPRQSGEMMLayer
1421from Deeploy .Targets .PULPOpen .Platform import PULPOptimizer
1522
23+ # NE16-specific GEMM mappers (run on the cluster engine using GAP9 SDK NE16 kernels)
24+ GAP9_NE16GEMMMapper = NodeMapper (NE16GEMMParser (), GAP9NE16RQSGEMMTilingReadyBindings )
25+ GAP9_NE16GEMMInt32Mapper = NodeMapper (GEMMParser (), GAP9NE16GEMMInt32TilingReadyBindings )
26+
27+ # Build a cluster mapping that includes the NE16 GEMM mappers
28+ _NE16ClusterMapping = dict (GAP9Mapping )
29+ _NE16ClusterMapping ['RequantizedGemm' ] = PULPRQSGEMMLayer (
30+ [GAP9_NE16GEMMMapper , GAP9_MatrixVecMapper , GAP9_TallGEMMMapper , GAP9_GEMMMapper ])
31+ _NE16ClusterMapping ['Gemm' ] = GEMMLayer ([GAP9_NE16GEMMInt32Mapper , GAP9_FloatGEMMMapper , GAP9_GEMMDequantMapper ])
32+
33+ # NE16 include list for the cluster engine: add SDK NE16 headers for GEMM kernels
34+ _NE16ClusterIncludeList = [
35+ "pmsis.h" , "DeeployGAP9Math.h" , "pulp_nn_kernels.h" , "DeeployMchan.h" , "CNN_BasicKernels_fp32.h" ,
36+ "CNN_BasicKernels_NE16.h" , "CNN_Copy.h" , "ne16_utils.h"
37+ ]
38+
1639NE16Optimizer = TopologyOptimizer ([
1740 * PULPOptimizer .passes ,
41+ NE16AdjustGEMMWeightLayoutPass (),
1842 RequantizedGemmToPwPass (),
1943], name = "NE16Optimizer" )
2044
@@ -28,15 +52,10 @@ def __init__(self,
2852 structBuffer = GAP9StructBuffer ,
2953 transientBuffer = GAP9TransientBuffer ) -> None :
3054 if engines is None :
31- # Drop SDK NE16 headers from the cluster engine include list so the
32- # generated Network.c does not pull in CNN_BasicKernels_NE16.h /
33- # ne16_utils.h alongside pulp-nnx's ne16_task_defs.h
34- # (NE16_REG_* macros are defined in both, causing -Werror redefs).
3555 cluster = GAP9ClusterEngine (
3656 "GAP9Cluster" ,
37- includeList = [
38- "pmsis.h" , "DeeployGAP9Math.h" , "pulp_nn_kernels.h" , "DeeployMchan.h" , "CNN_BasicKernels_fp32.h"
39- ],
57+ Mapping = _NE16ClusterMapping ,
58+ includeList = _NE16ClusterIncludeList ,
4059 )
4160 engines = [NE16Engine ("NE16" ), cluster ]
4261 super ().__init__ (engines , variableBuffer , constantBuffer , structBuffer , transientBuffer )
@@ -54,15 +73,10 @@ def __init__(self,
5473 structBuffer = GAP9StructBuffer ,
5574 transientBuffer = GAP9TransientBuffer ) -> None :
5675 if engines is None :
57- # Drop SDK NE16 headers from the cluster engine include list so the
58- # generated Network.c does not pull in CNN_BasicKernels_NE16.h /
59- # ne16_utils.h alongside pulp-nnx's ne16_task_defs.h
60- # (NE16_REG_* macros are defined in both, causing -Werror redefs).
6176 cluster = GAP9ClusterEngine (
6277 "GAP9Cluster" ,
63- includeList = [
64- "pmsis.h" , "DeeployGAP9Math.h" , "pulp_nn_kernels.h" , "DeeployMchan.h" , "CNN_BasicKernels_fp32.h"
65- ],
78+ Mapping = _NE16ClusterMapping ,
79+ includeList = _NE16ClusterIncludeList ,
6680 )
6781 engines = [NE16Engine ("NE16" ), cluster ]
6882 super ().__init__ (memoryHierarchy , defaultTargetMemoryLevel , engines , variableBuffer , constantBuffer ,
0 commit comments