Skip to content

Commit 56e5bcd

Browse files
authored
Merge pull request #465 from matthew-frank/matthew-frank/rcp-jackknife
Adds a --bootstrap histogram to rcp_checker/visualization_scripts/rcp_viewer.py
2 parents a94dfc3 + 4bd6c8c commit 56e5bcd

1 file changed

Lines changed: 49 additions & 2 deletions

File tree

mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import sys
88
import os
99
import argparse
10+
import math
11+
import numpy as np
1012

1113
#Add the project root directory (assumed to be 3 levels up) to sys.path
1214
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")))
@@ -16,6 +18,31 @@
1618
def print_rcp_record(record):
1719
print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}")
1820

21+
def bootstrap_scores(samples, num_runs, iterations=10000, rng=None):
22+
'''Bootstrap submission-sized trimmed-mean scores from the reference runs.
23+
24+
Draw num_runs values with replacement from samples, trim k=ceil(10%) from
25+
each end, and take the mean. Repeat iterations times, returning the scores.
26+
'''
27+
rng = rng if rng is not None else np.random.default_rng()
28+
arr = np.asarray(samples, dtype=float)
29+
k = math.ceil(0.10 * num_runs)
30+
if num_runs - 2 * k <= 0:
31+
sys.exit(f"Error: trimming {k} from each end of {num_runs} runs leaves no samples")
32+
scores = np.empty(iterations)
33+
for i in range(iterations):
34+
draw = np.sort(rng.choice(arr, size=num_runs, replace=True))
35+
scores[i] = draw[k:num_runs - k].mean()
36+
return scores
37+
38+
def print_histogram(scores, bar_width=50):
39+
'''Print an ASCII text-bar histogram of scores using numpy auto-binning.'''
40+
counts, edges = np.histogram(scores, bins='auto')
41+
max_count = counts.max() if len(counts) else 0
42+
for i, c in enumerate(counts):
43+
bar = '#' * (round(bar_width * c / max_count) if max_count else 0)
44+
print(f"{edges[i]:.1f}-{edges[i+1]:.1f} | {bar} ({c})")
45+
1946
# this should be a method of rcp_checker.RCP_Checker, but it's missing.
2047
# Instead we derived it from _find_min_rcp()
2148
def find_max_rcp(checker, rcp_pass_arg='pruned_rcps'):
@@ -68,7 +95,12 @@ def main():
6895
help='specify an RCP json file to use')
6996
parser.add_argument('--interpolate', action='store_true',
7097
help='generate interpolated rcp min/mean for all batch sizes')
71-
98+
parser.add_argument('--bootstrap', type=int, metavar='GBS',
99+
help='print a histogram of bootstrapped, submission-sized trimmed-mean '
100+
'scores for the real (non-interpolated) RCP at the given global batch size (GBS)')
101+
parser.add_argument('--seed', type=int, default=None,
102+
help='seed the RNG for reproducible --bootstrap output')
103+
72104

73105
args = parser.parse_args()
74106
rcp_pass_arg='pruned_rcps'
@@ -80,7 +112,22 @@ def main():
80112
if not args.no_header:
81113
print("BS,Mean,Min")
82114

83-
if not args.interpolate:
115+
if args.bootstrap is not None:
116+
record = checker._find_rcp(args.bootstrap, 'full_rcps')
117+
if record is None:
118+
sys.exit(f"Error: GBS {args.bootstrap} is not a measured "
119+
f"(non-interpolated) RCP batch size for {args.benchmark}")
120+
print_rcp_record(record)
121+
print(f"submission_runs: {checker.submission_runs}")
122+
max_speedup = record['RCP Mean'] / record['Min Epochs']
123+
print(f"max_speedup (mean/min): {max_speedup}")
124+
scores = bootstrap_scores(record['Epochs to converge'],
125+
checker.submission_runs,
126+
rng=np.random.default_rng(args.seed))
127+
prob_below_min = np.mean(scores < record['Min Epochs'])
128+
print(f"P(score < min): {prob_below_min}")
129+
print_histogram(scores)
130+
elif not args.interpolate:
84131
data=checker._get_rcp_data(rcp_pass_arg)
85132
for key, record in data.items():
86133
print_rcp_record(record)

0 commit comments

Comments
 (0)