77import sys
88import os
99import argparse
10+ import math
11+ import numpy as np
1012
1113#Add the project root directory (assumed to be 3 levels up) to sys.path
1214sys .path .append (os .path .abspath (os .path .join (os .path .dirname (__file__ ), "../../.." )))
1618def print_rcp_record (record ):
1719 print (f"{ record ['BS' ]} ,{ record ['RCP Mean' ]} ,{ record ['Min Epochs' ]} " )
1820
21+ def bootstrap_scores (samples , num_runs , iterations = 10000 , rng = None ):
22+ '''Bootstrap submission-sized trimmed-mean scores from the reference runs.
23+
24+ Draw num_runs values with replacement from samples, trim k=ceil(10%) from
25+ each end, and take the mean. Repeat iterations times, returning the scores.
26+ '''
27+ rng = rng if rng is not None else np .random .default_rng ()
28+ arr = np .asarray (samples , dtype = float )
29+ k = math .ceil (0.10 * num_runs )
30+ if num_runs - 2 * k <= 0 :
31+ sys .exit (f"Error: trimming { k } from each end of { num_runs } runs leaves no samples" )
32+ scores = np .empty (iterations )
33+ for i in range (iterations ):
34+ draw = np .sort (rng .choice (arr , size = num_runs , replace = True ))
35+ scores [i ] = draw [k :num_runs - k ].mean ()
36+ return scores
37+
38+ def print_histogram (scores , bar_width = 50 ):
39+ '''Print an ASCII text-bar histogram of scores using numpy auto-binning.'''
40+ counts , edges = np .histogram (scores , bins = 'auto' )
41+ max_count = counts .max () if len (counts ) else 0
42+ for i , c in enumerate (counts ):
43+ bar = '#' * (round (bar_width * c / max_count ) if max_count else 0 )
44+ print (f"{ edges [i ]:.1f} -{ edges [i + 1 ]:.1f} | { bar } ({ c } )" )
45+
1946# this should be a method of rcp_checker.RCP_Checker, but it's missing.
2047# Instead we derived it from _find_min_rcp()
2148def find_max_rcp (checker , rcp_pass_arg = 'pruned_rcps' ):
@@ -68,7 +95,12 @@ def main():
6895 help = 'specify an RCP json file to use' )
6996 parser .add_argument ('--interpolate' , action = 'store_true' ,
7097 help = 'generate interpolated rcp min/mean for all batch sizes' )
71-
98+ parser .add_argument ('--bootstrap' , type = int , metavar = 'GBS' ,
99+ help = 'print a histogram of bootstrapped, submission-sized trimmed-mean '
100+ 'scores for the real (non-interpolated) RCP at the given global batch size (GBS)' )
101+ parser .add_argument ('--seed' , type = int , default = None ,
102+ help = 'seed the RNG for reproducible --bootstrap output' )
103+
72104
73105 args = parser .parse_args ()
74106 rcp_pass_arg = 'pruned_rcps'
@@ -80,7 +112,22 @@ def main():
80112 if not args .no_header :
81113 print ("BS,Mean,Min" )
82114
83- if not args .interpolate :
115+ if args .bootstrap is not None :
116+ record = checker ._find_rcp (args .bootstrap , 'full_rcps' )
117+ if record is None :
118+ sys .exit (f"Error: GBS { args .bootstrap } is not a measured "
119+ f"(non-interpolated) RCP batch size for { args .benchmark } " )
120+ print_rcp_record (record )
121+ print (f"submission_runs: { checker .submission_runs } " )
122+ max_speedup = record ['RCP Mean' ] / record ['Min Epochs' ]
123+ print (f"max_speedup (mean/min): { max_speedup } " )
124+ scores = bootstrap_scores (record ['Epochs to converge' ],
125+ checker .submission_runs ,
126+ rng = np .random .default_rng (args .seed ))
127+ prob_below_min = np .mean (scores < record ['Min Epochs' ])
128+ print (f"P(score < min): { prob_below_min } " )
129+ print_histogram (scores )
130+ elif not args .interpolate :
84131 data = checker ._get_rcp_data (rcp_pass_arg )
85132 for key , record in data .items ():
86133 print_rcp_record (record )
0 commit comments