Skip to content

Commit d4a8bac

Browse files
Xeratecclaude
andcommitted
Update profiling parser for pre/post-kernel cycles
- ProfilingTraceParser: parse the new "Pre-Kernel"/"Post-Kernel" trace format (replacing Input/Output DMA) and track per-layer tile count. - profiling2csv: report pre/post-kernel cycles, total cycles, ops/cycle, and tile count; append a "Total" row; default the output CSV path to the trace path with a .csv suffix. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ef015e9 commit d4a8bac

2 files changed

Lines changed: 44 additions & 19 deletions

File tree

DeeployTest/profiling2csv.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,62 @@ class LayerInfo:
1616
bufferingMode: str
1717
ops: int
1818
totalKernelCycles: int
19-
totalInputDmaCycles: int
20-
totalOutputDmaCycles: int
19+
totalPreKernelCycles: int
20+
totalPostKernelCycles: int
21+
totalCycles: int
22+
opsperCycle: float
23+
tiles: int = 0
2124

2225

2326
def layerInfoFromProfiling(name: str, profiling: LayerProfiling) -> LayerInfo:
27+
totalKernelCycles = sum(profiling.kernelCycles)
28+
totalPreKernelCycles = sum(profiling.preKernelCycles)
29+
totalPostKernelCycles = sum(profiling.postKernelCycles)
30+
totalCycles = totalKernelCycles + totalPreKernelCycles + totalPostKernelCycles
31+
opsperCycle = profiling.ops / totalCycles if totalCycles > 0 else 0.0
2432
return LayerInfo(name = name,
2533
bufferingMode = profiling.bufferingMode,
2634
ops = profiling.ops,
27-
totalKernelCycles = sum(profiling.kernelCycles),
28-
totalInputDmaCycles = sum(profiling.inputDmaCycles),
29-
totalOutputDmaCycles = sum(profiling.outputDmaCycles))
35+
totalKernelCycles = totalKernelCycles,
36+
totalPreKernelCycles = totalPreKernelCycles,
37+
totalPostKernelCycles = totalPostKernelCycles,
38+
totalCycles = totalCycles,
39+
opsperCycle = opsperCycle,
40+
tiles = profiling.tiles)
3041

3142

3243
if __name__ == "__main__":
3344
parser = argparse.ArgumentParser(description = 'Parse and visualize profiling results')
3445
parser.add_argument('trace_path', type = str, help = 'Path to the profiling trace file')
35-
parser.add_argument('-o',
36-
'--output_path',
37-
type = str,
38-
default = "profile.csv",
39-
help = 'Path to the output CSV file')
46+
parser.add_argument('-o', '--output_path', type = str, help = 'Path to the output CSV file')
4047
parser.add_argument('--table',
4148
action = 'store_true',
4249
default = False,
4350
help = 'Print a table of the profiled results.')
4451
args = parser.parse_args()
4552

53+
if not args.output_path:
54+
args.output_path = args.trace_path.rsplit('.', 1)[0] + '.csv'
55+
4656
profilingParser = ProfilingTraceParser()
4757

4858
with open(args.trace_path, "r") as f:
4959
layerProfilings = profilingParser.parse(f.read())
5060

5161
fieldnames = [field.name for field in dataclasses.fields(LayerInfo)]
5262
layerInfos = [layerInfoFromProfiling(name, profiling) for name, profiling in layerProfilings.items()]
63+
layerInfos += [
64+
LayerInfo(name = "Total",
65+
bufferingMode = "",
66+
ops = sum(info.ops for info in layerInfos),
67+
totalKernelCycles = sum(info.totalKernelCycles for info in layerInfos),
68+
totalPreKernelCycles = sum(info.totalPreKernelCycles for info in layerInfos),
69+
totalPostKernelCycles = sum(info.totalPostKernelCycles for info in layerInfos),
70+
totalCycles = sum(info.totalCycles for info in layerInfos),
71+
opsperCycle = sum(info.ops for info in layerInfos) /
72+
sum(info.totalCycles for info in layerInfos) if sum(
73+
info.totalCycles for info in layerInfos) > 0 else 0.0)
74+
]
5375

5476
with open(args.output_path, 'w', newline = '') as csvfile:
5577
writer = csv.DictWriter(csvfile, fieldnames = fieldnames)

DeeployTest/testUtils/ProfilingTraceParser.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@ class LayerProfiling:
1414
bufferingMode: BufferingMode
1515
ops: int
1616
kernelCycles: List[int]
17-
inputDmaCycles: List[int]
18-
outputDmaCycles: List[int]
17+
preKernelCycles: List[int]
18+
postKernelCycles: List[int]
19+
tiles: int = 0
1920

2021

2122
class ProfilingTraceParser:
2223

2324
lineRegex = re.compile(
24-
r"\[(\w+)\]\[(SB|DB)\]\[(\d+) ops\]\[Tile \d+\] (Input DMA|Output DMA|Kernel) took (\d+) cycles\n")
25+
r"\[(\w+)\]\[(SB|DB)\]\[(\d+) ops\]\[Tile \d+\] (Pre-Kernel|Kernel|Post-Kernel)\s*:\s*(\d+) cycles")
2526

2627
def parse(self, trace: str) -> Dict[str, LayerProfiling]:
2728
layerProfilings: Dict[str, LayerProfiling] = {}
@@ -34,16 +35,18 @@ def parse(self, trace: str) -> Dict[str, LayerProfiling]:
3435
bufferingMode = bufferingMode, # type: ignore
3536
ops = int(ops),
3637
kernelCycles = [],
37-
inputDmaCycles = [],
38-
outputDmaCycles = [])
38+
preKernelCycles = [],
39+
postKernelCycles = [])
3940

4041
if measurementName == "Kernel":
4142
layerProfilings[layerName].kernelCycles.append(int(cycles))
42-
elif measurementName == "Input DMA":
43-
layerProfilings[layerName].inputDmaCycles.append(int(cycles))
44-
elif measurementName == "Output DMA":
45-
layerProfilings[layerName].outputDmaCycles.append(int(cycles))
43+
elif measurementName == "Pre-Kernel":
44+
layerProfilings[layerName].preKernelCycles.append(int(cycles))
45+
elif measurementName == "Post-Kernel":
46+
layerProfilings[layerName].postKernelCycles.append(int(cycles))
4647
else:
4748
raise RuntimeError(f"Unsupported measurement name: {measurementName}")
4849

50+
layerProfilings[layerName].tiles = len(layerProfilings[layerName].kernelCycles)
51+
4952
return layerProfilings

0 commit comments

Comments
 (0)