Skip to content

Commit 0a1ecfd

Browse files
authored
test: Add cuda memory timeline plotting (#558)
1 parent 5ec9ada commit 0a1ecfd

File tree

1 file changed

+59
-21
lines changed

1 file changed

+59
-21
lines changed

tests/profiling/plot_memory_timeline.py

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from dataclasses import dataclass
99
from pathlib import Path
1010

11+
import matplotlib
12+
13+
matplotlib.use("Agg") # Use non-GUI backend to avoid tkinter dependency
1114
import matplotlib.pyplot as plt
1215
import numpy as np
1316
from paths import TRACES_DIR
@@ -17,17 +20,21 @@
1720
class MemoryFrame:
1821
timestamp: int
1922
total_allocated: int # in bytes
23+
device_type: int # 0 for CPU, 1 for CUDA
24+
device_id: int # -1 for CPU, 0+ for CUDA devices
2025

2126
@staticmethod
2227
def from_event(event: dict):
2328
args = event["args"]
2429
return MemoryFrame(
2530
timestamp=event["ts"],
2631
total_allocated=args.get("Total Allocated"),
32+
device_type=args.get("Device Type"),
33+
device_id=args.get("Device Id"),
2734
)
2835

2936

30-
def extract_memory_timeline(path: Path) -> np.ndarray:
37+
def extract_memory_timelines(path: Path) -> tuple[np.ndarray, np.ndarray]:
3138
with open(path) as f:
3239
data = json.load(f)
3340

@@ -36,34 +43,65 @@ def extract_memory_timeline(path: Path) -> np.ndarray:
3643
print("Extracting memory frames...")
3744

3845
frames = [MemoryFrame.from_event(e) for e in events if e["name"] == "[memory]"]
39-
frames.sort(key=lambda frame: frame.timestamp)
4046

41-
print(f"Found {len(frames):,} memory frames")
47+
# Separate CPU (device_type=0) and CUDA (device_type=1) frames
48+
cpu_frames = [f for f in frames if f.device_type == 0]
49+
cuda_frames = [f for f in frames if f.device_type == 1]
50+
51+
cpu_frames.sort(key=lambda frame: frame.timestamp)
52+
cuda_frames.sort(key=lambda frame: frame.timestamp)
4253

43-
timestamp_list = [frame.timestamp for frame in frames]
44-
total_allocated_list = [frame.total_allocated for frame in frames]
54+
print(f"Found {len(cpu_frames)} CPU memory frames and {len(cuda_frames)} CUDA memory frames")
4555

46-
return np.array([timestamp_list, total_allocated_list]).T
56+
cpu_timeline = np.array([[f.timestamp, f.total_allocated] for f in cpu_frames])
57+
cuda_timeline = np.array([[f.timestamp, f.total_allocated] for f in cuda_frames])
58+
59+
return cpu_timeline, cuda_timeline
4760

4861

4962
def plot_memory_timelines(experiment: str, folders: list[str]) -> None:
50-
timelines = list[np.ndarray]()
63+
cpu_timelines = []
64+
cuda_timelines = []
5165
for folder in folders:
5266
path = TRACES_DIR / folder / f"{experiment}.json"
53-
timelines.append(extract_memory_timeline(path))
54-
55-
fig, ax = plt.subplots(figsize=(12, 6))
56-
for folder, timeline in zip(folders, timelines, strict=True):
57-
time = (timeline[:, 0] - timeline[0, 0]) // 1000 # Make time start at 0 and convert to ms.
58-
memory = timeline[:, 1]
59-
ax.plot(time, memory, label=folder, linewidth=1.5)
60-
61-
ax.set_xlabel("Time (ms)", fontsize=12)
62-
ax.set_ylabel("Total Allocated (bytes)", fontsize=12)
63-
ax.set_title(f"Memory Timeline: {experiment}", fontsize=14, fontweight="bold")
64-
ax.legend(loc="best", fontsize=11)
65-
ax.grid(True, alpha=0.3)
66-
ax.set_ylim(bottom=0)
67+
cpu_timeline, cuda_timeline = extract_memory_timelines(path)
68+
cpu_timelines.append(cpu_timeline)
69+
cuda_timelines.append(cuda_timeline)
70+
71+
fig, (ax_cuda, ax_cpu) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
72+
73+
start_times = [
74+
min(cpu_tl[0, 0], cuda_tl[0, 0]) if len(cuda_tl) > 0 else cpu_tl[0, 0]
75+
for cpu_tl, cuda_tl in zip(cpu_timelines, cuda_timelines, strict=True)
76+
]
77+
78+
# Plot CUDA memory (top subplot)
79+
for folder, cuda_timeline, start_time in zip(folders, cuda_timelines, start_times, strict=True):
80+
if len(cuda_timeline) > 0:
81+
time = (cuda_timeline[:, 0] - start_time) // 1000 # Convert to ms starting at 0
82+
memory = cuda_timeline[:, 1]
83+
ax_cuda.plot(time, memory, label=folder, linewidth=1.5)
84+
85+
ax_cuda.set_xlabel("Time (ms)", fontsize=12)
86+
ax_cuda.set_ylabel("CUDA Memory (bytes)", fontsize=12)
87+
ax_cuda.set_title(f"CUDA Memory Timeline: {experiment}", fontsize=14, fontweight="bold")
88+
ax_cuda.legend(loc="best", fontsize=11)
89+
ax_cuda.grid(True, alpha=0.3)
90+
ax_cuda.set_ylim(bottom=0)
91+
92+
# Plot CPU memory (bottom subplot)
93+
for folder, cpu_timeline, start_time in zip(folders, cpu_timelines, start_times, strict=True):
94+
time = (cpu_timeline[:, 0] - start_time) // 1000 # Convert to ms starting at 0
95+
memory = cpu_timeline[:, 1]
96+
ax_cpu.plot(time, memory, label=folder, linewidth=1.5)
97+
98+
ax_cpu.set_xlabel("Time (ms)", fontsize=12)
99+
ax_cpu.set_ylabel("CPU Memory (bytes)", fontsize=12)
100+
ax_cpu.set_title(f"CPU Memory Timeline: {experiment}", fontsize=14, fontweight="bold")
101+
ax_cpu.legend(loc="best", fontsize=11)
102+
ax_cpu.grid(True, alpha=0.3)
103+
ax_cpu.set_ylim(bottom=0)
104+
67105
fig.tight_layout()
68106

69107
output_dir = Path(TRACES_DIR / "memory_timelines")

0 commit comments

Comments
 (0)