|
1 | | -import time |
2 | 1 | from dataclasses import asdict |
3 | 2 | from pathlib import Path |
4 | 3 | from typing import Optional |
5 | 4 |
|
6 | | -import matplotlib.patches as mpatches |
| 5 | +import matplotlib |
| 6 | +import matplotlib.pyplot as plt |
7 | 7 | import numpy as np |
8 | 8 | import pandas as pd |
9 | | -import pylab |
10 | 9 | import seaborn as sns |
11 | 10 |
|
12 | 11 | from cubed.runtime.types import Callback |
13 | 12 |
|
14 | 13 | sns.set_style("whitegrid") |
15 | | -pylab.switch_backend("Agg") |
| 14 | +matplotlib.use("Agg") |
16 | 15 |
|
17 | 16 |
|
18 | 17 | class TimelineVisualizationCallback(Callback): |
19 | 18 | def __init__(self, format: Optional[str] = "svg") -> None: |
20 | 19 | self.format = format |
21 | 20 |
|
22 | 21 | def on_compute_start(self, event): |
23 | | - self.start_tstamp = time.time() |
24 | | - self.stats = [] |
| 22 | + self.events = [] |
25 | 23 |
|
26 | 24 | def on_task_end(self, event): |
27 | | - self.stats.append(asdict(event)) |
| 25 | + self.events.append(asdict(event)) |
28 | 26 |
|
29 | 27 | def on_compute_end(self, event): |
30 | | - self.end_tstamp = time.time() |
31 | | - |
32 | | - stats_df = pd.DataFrame(self.stats) |
33 | | - stats_df = stats_df.sort_values( |
34 | | - by=["task_create_tstamp", "name"], ascending=True |
35 | | - ) |
36 | | - total_calls = len(stats_df) |
37 | | - palette = sns.color_palette("deep", 6) |
38 | | - |
39 | | - fig = pylab.figure(figsize=(10, 6)) |
40 | | - ax = fig.add_subplot(1, 1, 1) |
41 | | - |
42 | | - y = np.arange(total_calls) |
43 | | - point_size = 10 |
44 | | - |
45 | | - fields = [ |
46 | | - ("task create", stats_df.task_create_tstamp - self.start_tstamp), |
47 | | - ("function start", stats_df.function_start_tstamp - self.start_tstamp), |
48 | | - ("function end", stats_df.function_end_tstamp - self.start_tstamp), |
49 | | - ("task result", stats_df.task_result_tstamp - self.start_tstamp), |
50 | | - ] |
51 | | - |
52 | | - patches = [] |
53 | | - for f_i, (field_name, val) in enumerate(fields): |
54 | | - ax.scatter( |
55 | | - val, y, c=[palette[f_i]], edgecolor="none", s=point_size, alpha=0.8 |
56 | | - ) |
57 | | - patches.append(mpatches.Patch(color=palette[f_i], label=field_name)) |
58 | | - |
59 | | - ax.set_xlabel("Execution Time (sec)") |
60 | | - ax.set_ylabel("Function Call") |
61 | | - |
62 | | - legend = pylab.legend(handles=patches, loc="upper right", frameon=True) |
63 | | - legend.get_frame().set_facecolor("#FFFFFF") |
64 | | - |
65 | | - yplot_step = int(np.max([1, total_calls / 20])) |
66 | | - y_ticks = np.arange(total_calls // yplot_step + 2) * yplot_step |
67 | | - ax.set_yticks(y_ticks) |
68 | | - ax.set_ylim(-0.02 * total_calls, total_calls * 1.02) |
69 | | - for y in y_ticks: |
70 | | - ax.axhline(y, c="k", alpha=0.1, linewidth=1) |
71 | | - |
72 | | - max_seconds = np.max(self.end_tstamp - self.start_tstamp) * 1.25 |
73 | | - xplot_step = max(int(max_seconds / 8), 1) |
74 | | - x_ticks = np.arange(max_seconds // xplot_step + 2) * xplot_step |
75 | | - ax.set_xlim(0, max_seconds) |
76 | | - |
77 | | - ax.set_xticks(x_ticks) |
78 | | - for x in x_ticks: |
79 | | - ax.axvline(x, c="k", alpha=0.2, linewidth=0.8) |
80 | | - |
81 | | - ax.grid(False) |
82 | | - fig.tight_layout() |
| 28 | + events_df = pd.DataFrame(self.events) |
| 29 | + fig = generate_timeline(events_df) |
83 | 30 |
|
84 | 31 | self.dst = Path(f"history/{event.compute_id}") |
85 | 32 | self.dst.mkdir(parents=True, exist_ok=True) |
86 | 33 | self.dst = self.dst / f"timeline.{self.format}" |
87 | 34 |
|
88 | 35 | fig.savefig(self.dst) |
| 36 | + |
| 37 | + |
| 38 | +def generate_timeline(events_df): |
| 39 | + events_df = events_df.sort_values(by=["task_create_tstamp", "name"], ascending=True) |
| 40 | + start_tstamp = events_df["task_create_tstamp"].min() |
| 41 | + total_calls = len(events_df) |
| 42 | + |
| 43 | + fig, ax = plt.subplots(figsize=(10, 8)) |
| 44 | + |
| 45 | + y = np.arange(total_calls) |
| 46 | + point_size = 7 |
| 47 | + |
| 48 | + fields = [ |
| 49 | + ("task create", events_df.task_create_tstamp - start_tstamp), |
| 50 | + ("function start", events_df.function_start_tstamp - start_tstamp), |
| 51 | + ("function end", events_df.function_end_tstamp - start_tstamp), |
| 52 | + ("task result", events_df.task_result_tstamp - start_tstamp), |
| 53 | + ] |
| 54 | + |
| 55 | + for f_i, (field_name, val) in enumerate(fields): |
| 56 | + ax.scatter(val, y, label=field_name, edgecolor="none", s=point_size, alpha=0.8) |
| 57 | + |
| 58 | + ax.set_xlabel("Execution time (sec)") |
| 59 | + ax.set_ylabel("Task number") |
| 60 | + |
| 61 | + ax.legend() |
| 62 | + |
| 63 | + return fig |
0 commit comments