88from dataclasses import dataclass
99from pathlib import Path
1010
11+ import matplotlib
12+
13+ matplotlib .use ("Agg" ) # Use non-GUI backend to avoid tkinter dependency
1114import matplotlib .pyplot as plt
1215import numpy as np
1316from paths import TRACES_DIR
1720class MemoryFrame :
1821 timestamp : int
1922 total_allocated : int # in bytes
23+ device_type : int # 0 for CPU, 1 for CUDA
24+ device_id : int # -1 for CPU, 0+ for CUDA devices
2025
2126 @staticmethod
2227 def from_event (event : dict ):
2328 args = event ["args" ]
2429 return MemoryFrame (
2530 timestamp = event ["ts" ],
2631 total_allocated = args .get ("Total Allocated" ),
32+ device_type = args .get ("Device Type" ),
33+ device_id = args .get ("Device Id" ),
2734 )
2835
2936
30- def extract_memory_timeline (path : Path ) -> np .ndarray :
37+ def extract_memory_timelines (path : Path ) -> tuple [ np .ndarray , np . ndarray ] :
3138 with open (path ) as f :
3239 data = json .load (f )
3340
@@ -36,34 +43,65 @@ def extract_memory_timeline(path: Path) -> np.ndarray:
3643 print ("Extracting memory frames..." )
3744
3845 frames = [MemoryFrame .from_event (e ) for e in events if e ["name" ] == "[memory]" ]
39- frames .sort (key = lambda frame : frame .timestamp )
4046
41- print (f"Found { len (frames ):,} memory frames" )
47+ # Separate CPU (device_type=0) and CUDA (device_type=1) frames
48+ cpu_frames = [f for f in frames if f .device_type == 0 ]
49+ cuda_frames = [f for f in frames if f .device_type == 1 ]
50+
51+ cpu_frames .sort (key = lambda frame : frame .timestamp )
52+ cuda_frames .sort (key = lambda frame : frame .timestamp )
4253
43- timestamp_list = [frame .timestamp for frame in frames ]
44- total_allocated_list = [frame .total_allocated for frame in frames ]
54+ print (f"Found { len (cpu_frames )} CPU memory frames and { len (cuda_frames )} CUDA memory frames" )
4555
46- return np .array ([timestamp_list , total_allocated_list ]).T
56+ cpu_timeline = np .array ([[f .timestamp , f .total_allocated ] for f in cpu_frames ])
57+ cuda_timeline = np .array ([[f .timestamp , f .total_allocated ] for f in cuda_frames ])
58+
59+ return cpu_timeline , cuda_timeline
4760
4861
4962def plot_memory_timelines (experiment : str , folders : list [str ]) -> None :
50- timelines = list [np .ndarray ]()
63+ cpu_timelines = []
64+ cuda_timelines = []
5165 for folder in folders :
5266 path = TRACES_DIR / folder / f"{ experiment } .json"
53- timelines .append (extract_memory_timeline (path ))
54-
55- fig , ax = plt .subplots (figsize = (12 , 6 ))
56- for folder , timeline in zip (folders , timelines , strict = True ):
57- time = (timeline [:, 0 ] - timeline [0 , 0 ]) // 1000 # Make time start at 0 and convert to ms.
58- memory = timeline [:, 1 ]
59- ax .plot (time , memory , label = folder , linewidth = 1.5 )
60-
61- ax .set_xlabel ("Time (ms)" , fontsize = 12 )
62- ax .set_ylabel ("Total Allocated (bytes)" , fontsize = 12 )
63- ax .set_title (f"Memory Timeline: { experiment } " , fontsize = 14 , fontweight = "bold" )
64- ax .legend (loc = "best" , fontsize = 11 )
65- ax .grid (True , alpha = 0.3 )
66- ax .set_ylim (bottom = 0 )
67+ cpu_timeline , cuda_timeline = extract_memory_timelines (path )
68+ cpu_timelines .append (cpu_timeline )
69+ cuda_timelines .append (cuda_timeline )
70+
71+ fig , (ax_cuda , ax_cpu ) = plt .subplots (2 , 1 , figsize = (12 , 10 ), sharex = True )
72+
73+ start_times = [
74+ min (cpu_tl [0 , 0 ], cuda_tl [0 , 0 ]) if len (cuda_tl ) > 0 else cpu_tl [0 , 0 ]
75+ for cpu_tl , cuda_tl in zip (cpu_timelines , cuda_timelines , strict = True )
76+ ]
77+
78+ # Plot CUDA memory (top subplot)
79+ for folder , cuda_timeline , start_time in zip (folders , cuda_timelines , start_times , strict = True ):
80+ if len (cuda_timeline ) > 0 :
81+ time = (cuda_timeline [:, 0 ] - start_time ) // 1000 # Convert to ms starting at 0
82+ memory = cuda_timeline [:, 1 ]
83+ ax_cuda .plot (time , memory , label = folder , linewidth = 1.5 )
84+
85+ ax_cuda .set_xlabel ("Time (ms)" , fontsize = 12 )
86+ ax_cuda .set_ylabel ("CUDA Memory (bytes)" , fontsize = 12 )
87+ ax_cuda .set_title (f"CUDA Memory Timeline: { experiment } " , fontsize = 14 , fontweight = "bold" )
88+ ax_cuda .legend (loc = "best" , fontsize = 11 )
89+ ax_cuda .grid (True , alpha = 0.3 )
90+ ax_cuda .set_ylim (bottom = 0 )
91+
92+ # Plot CPU memory (bottom subplot)
93+ for folder , cpu_timeline , start_time in zip (folders , cpu_timelines , start_times , strict = True ):
94+ time = (cpu_timeline [:, 0 ] - start_time ) // 1000 # Convert to ms starting at 0
95+ memory = cpu_timeline [:, 1 ]
96+ ax_cpu .plot (time , memory , label = folder , linewidth = 1.5 )
97+
98+ ax_cpu .set_xlabel ("Time (ms)" , fontsize = 12 )
99+ ax_cpu .set_ylabel ("CPU Memory (bytes)" , fontsize = 12 )
100+ ax_cpu .set_title (f"CPU Memory Timeline: { experiment } " , fontsize = 14 , fontweight = "bold" )
101+ ax_cpu .legend (loc = "best" , fontsize = 11 )
102+ ax_cpu .grid (True , alpha = 0.3 )
103+ ax_cpu .set_ylim (bottom = 0 )
104+
67105 fig .tight_layout ()
68106
69107 output_dir = Path (TRACES_DIR / "memory_timelines" )
0 commit comments