77from Deeploy .DeeployTypes import CodeGenVerbosity , CodeTransformationPass , ExecutionBlock , NetworkContext , _NoVerbosity
88from Deeploy .TilingExtension .AsyncDma import AsyncDma
99from Deeploy .TilingExtension .CodeTransformationPasses .DoubleBufferingTilingCodeGeneration import \
10- DoubleBufferingTilingCodeGeneration , ProfilingDoubleBufferingTilingMixIn
10+ DoubleBufferingTilingCodeGeneration , PerfCounterDoubleBufferingTilingMixIn , ProfilingDoubleBufferingTilingMixIn
1111from Deeploy .TilingExtension .CodeTransformationPasses .SingleBufferingTilingCodeGeneration import \
12- ProfilingSingleBufferingTilingMixIn , SingleBufferingTilingCodeGeneration
12+ PerfCounterSingleBufferingTilingMixIn , ProfilingSingleBufferingTilingMixIn , SingleBufferingTilingCodeGeneration
1313
1414
1515class PULPClusterTilingGenerationSB (SingleBufferingTilingCodeGeneration ):
@@ -28,24 +28,55 @@ class ProfilingPULPClusterTilingGenerationDB(DoubleBufferingTilingCodeGeneration
2828 pass
2929
3030
31+ class PerfCounterPULPClusterTilingGenerationSB (SingleBufferingTilingCodeGeneration , PerfCounterSingleBufferingTilingMixIn ):
32+ """Single buffering with performance counter profiling"""
33+ pass
34+
35+
36+ class PerfCounterPULPClusterTilingGenerationDB (DoubleBufferingTilingCodeGeneration , PerfCounterDoubleBufferingTilingMixIn ):
37+ """Double buffering with performance counter profiling"""
38+ pass
39+
40+
41+ class CombinedProfilingPULPClusterTilingGenerationSB (SingleBufferingTilingCodeGeneration , ProfilingSingleBufferingTilingMixIn , PerfCounterSingleBufferingTilingMixIn ):
42+ """Single buffering with both cycle profiling and performance counter profiling"""
43+ pass
44+
45+
46+ class CombinedProfilingPULPClusterTilingGenerationDB (DoubleBufferingTilingCodeGeneration , ProfilingDoubleBufferingTilingMixIn , PerfCounterDoubleBufferingTilingMixIn ):
47+ """Double buffering with both cycle profiling and performance counter profiling"""
48+ pass
49+
50+
3151class PULPClusterTiling (CodeTransformationPass ):
3252
33- def __init__ (self , externalMemory : str , localMemory : str , dma : AsyncDma ):
53+ def __init__ (self , externalMemory : str , localMemory : str , dma : AsyncDma , usePerfCounters : bool = False ):
54+ self .usePerfCounters = usePerfCounters
3455 self .SB = PULPClusterTilingGenerationSB (externalMemory , localMemory , dma )
3556 self .profilingSB = ProfilingPULPClusterTilingGenerationSB (externalMemory , localMemory , dma )
57+ self .perfCounterSB = PerfCounterPULPClusterTilingGenerationSB (externalMemory , localMemory , dma )
58+ self .combinedProfilingSB = CombinedProfilingPULPClusterTilingGenerationSB (externalMemory , localMemory , dma )
3659 self .DB = PULPClusterTilingGenerationDB (externalMemory , localMemory , dma )
3760 self .profilingDB = ProfilingPULPClusterTilingGenerationDB (externalMemory , localMemory , dma )
61+ self .perfCounterDB = PerfCounterPULPClusterTilingGenerationDB (externalMemory , localMemory , dma )
62+ self .combinedProfilingDB = CombinedProfilingPULPClusterTilingGenerationDB (externalMemory , localMemory , dma )
3863
3964 def apply (self ,
4065 ctxt : NetworkContext ,
4166 executionBlock : ExecutionBlock ,
4267 name : str ,
4368 verbose : CodeGenVerbosity = _NoVerbosity ) -> Tuple [NetworkContext , ExecutionBlock ]:
4469
45- if verbose .tilingProfiling :
70+ if self .usePerfCounters and verbose .tilingProfiling :
71+ # Use combined profiling: cycle measurements + performance counter stats
72+ ctxt , executionBlock = self .combinedProfilingSB .apply (ctxt , executionBlock , name )
73+ ctxt , executionBlock = self .combinedProfilingDB .apply (ctxt , executionBlock , name )
74+ elif verbose .tilingProfiling :
75+ # Use cycle profiling only (basic cycle measurements)
4676 ctxt , executionBlock = self .profilingSB .apply (ctxt , executionBlock , name )
4777 ctxt , executionBlock = self .profilingDB .apply (ctxt , executionBlock , name )
4878 else :
79+ # No profiling
4980 ctxt , executionBlock = self .SB .apply (ctxt , executionBlock , name )
5081 ctxt , executionBlock = self .DB .apply (ctxt , executionBlock , name )
5182
0 commit comments