77import os
88import re
99import sys
10+ import traceback
1011import itertools
1112import pandas as pd
1213import yaml
14+ import numpy as np
1315import hashlib
1416import math
1517import operator
1618import uuid as uuidlib
19+ import copy
1720
1821from ..compliance_checker import mlp_compliance
1922from ..compliance_checker .mlp_compliance import usage_choices , rule_choices
2023from ..compliance_checker .mlp_parser import parse_file
2124
25+ from ..rcp_checker import rcp_checker
2226from ..benchmark_meta import get_allowed_benchmarks , get_result_file_counts
2327
2428
@@ -263,12 +267,23 @@ def _get_weak_scaling_metric_schema():
263267 }
264268
265269
266- def _get_empty_summary (usage , ruleset , weak_scaling = False ):
270+ def _get_strong_scaling_metric_schema ():
271+ return {
272+ 'time_to_train' : float ,
273+ 'Energy' : float ,
274+ 'GBS' : float ,
275+ 'epochs' : float ,
276+ 'RCP' : str ,
277+ 'rcp_scaling_factor' : float ,
278+ }
279+
280+
281+ def _get_empty_summary (usage , ruleset , weak_scaling = False , detailed = False ):
267282 return Summary (
268- _get_column_schema (usage , ruleset , weak_scaling = weak_scaling ).keys ())
283+ _get_column_schema (usage , ruleset , weak_scaling = weak_scaling , detailed = detailed ).keys ())
269284
270285
271- def _get_column_schema (usage , ruleset , weak_scaling = False ):
286+ def _get_column_schema (usage , ruleset , weak_scaling = False , detailed = False ):
272287 schema = {
273288 'division' : str ,
274289 'availability' : str ,
@@ -289,9 +304,17 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
289304 for metric , dtype in _get_weak_scaling_metric_schema ().items ():
290305 schema ['{}:{}' .format (benchmark , metric )] = dtype
291306 else :
292- schema .update (
293- {b : float
294- for b in get_allowed_benchmarks (usage , ruleset )})
307+ if detailed :
308+ benchmarks = get_allowed_benchmarks (usage , ruleset )
309+ for benchmark in benchmarks :
310+ for metric , dtype in _get_strong_scaling_metric_schema ().items ():
311+ schema ['{}:{}' .format (benchmark , metric )] = dtype
312+ else :
313+ schema .update (
314+ {
315+ b : float for b in get_allowed_benchmarks (usage , ruleset )
316+ }
317+ )
295318 schema .update ({'details_url' : str , 'code_url' : str })
296319 return schema
297320
@@ -404,8 +427,8 @@ def _compute_strong_score_standalone(
404427 power_score = olympic_avg
405428 power_score *= scaling_factor
406429 if return_full_scores :
407- return scores_track , power_scores_track , score , power_score
408- return score , power_score
430+ return scores_track , power_scores_track , score , power_score , scaling_factor
431+ return score , power_score , scaling_factor
409432
410433
411434def _compute_weak_score_standalone (benchmark , system , has_power , benchmark_folder , usage , ruleset , desc = {"submitter" : None }):
@@ -474,31 +497,106 @@ def _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folde
474497
475498
476499
477- def _compute_strong_scaling_scores (desc , system_folder , usage , ruleset ):
500+ def _compute_strong_scaling_scores (desc , system_folder , usage , ruleset , division , rcp_bypass = False ):
478501 # Collect scores for benchmarks.
479502 benchmark_scores = {}
480- benchmark_power_scores = {}
481- has_power = None
503+ detailed_bechmark_scores = {}
482504 benchmark_folder_parent = os .path .join (
483505 system_folder , 'strong' ) if usage == 'hpc' else system_folder
484506 if not os .path .isdir (benchmark_folder_parent ):
485- return benchmark_scores , benchmark_power_scores
507+ return benchmark_scores , {}
486508 for benchmark_folder in _get_sub_folders (benchmark_folder_parent ):
487509 folder_parts = benchmark_folder .split ('/' )
488510 # Check if this benchmark has power results
489511 has_power = _has_power (benchmark_folder )
490512 benchmark = _benchmark_alias (folder_parts [- 1 ])
491513 system = folder_parts [- 3 ] if usage == 'hpc' else folder_parts [- 2 ]
492- # Read scores from result files.
493- score , power_score = _compute_strong_score_standalone (benchmark , system , has_power , benchmark_folder , usage , ruleset , desc )
514+ # Compute base perf/power scores
515+ score , power_score , rcp_scaling_factor = _compute_strong_score_standalone (
516+ benchmark , system , has_power , benchmark_folder , usage , ruleset , desc
517+ )
518+
519+ # RCP/GBS/Epochs additions for closed division
520+ benchmark_gbs = None
521+ benchmark_epochs = None
522+ benchmark_rcp = None
523+ if division == 'closed' :
524+ pattern = '{folder}/result_*.txt' .format (folder = benchmark_folder )
525+ result_files = glob .glob (pattern , recursive = True )
526+ try :
527+ # RCP check
528+ verbose = False
529+ bert_train_samples = False
530+ rcp_pass , rcp_msg , _ = rcp_checker .check_directory (
531+ benchmark_folder ,
532+ usage ,
533+ ruleset ,
534+ verbose ,
535+ bert_train_samples ,
536+ rcp_file = None ,
537+ rcp_pass = 'pruned_rcps' ,
538+ rcp_bypass = rcp_bypass ,
539+ set_scaling = True ,
540+ )
541+ if not rcp_pass :
542+ print (
543+ 'ERROR: RCP Test Failed on {}/{}/{} with message: {}.' .format (
544+ desc ['submitter' ], system , benchmark , rcp_msg
545+ )
546+ )
547+ if rcp_msg == 'RCP found' :
548+ benchmark_rcp = 'Fail'
549+ elif rcp_msg == 'RCP Interpolation' :
550+ benchmark_rcp = 'Interp. Fail'
551+ elif 'Missing' in rcp_msg :
552+ benchmark_rcp = 'Missing'
553+ elif rcp_msg == 'Cannot find any RCPs' :
554+ benchmark_rcp = 'No RCP'
555+ else :
556+ benchmark_rcp = 'Unknown state'
557+ else :
558+ benchmark_rcp = 'Pass'
559+
560+ # GBS and epochs
561+ benchmark_gbs , subm_epochs , _ = rcp_checker .get_submission_epochs (
562+ result_files , ruleset , bert_train_samples = False
563+ )
564+ subm_epochs .sort ()
565+ samples_rejected = 1
566+ if len (subm_epochs ) >= 2 * samples_rejected + 1 :
567+ benchmark_epochs = float (
568+ np .mean (
569+ subm_epochs [
570+ samples_rejected : len (subm_epochs ) - samples_rejected
571+ ]
572+ )
573+ )
574+ except Exception as e :
575+ print (
576+ f"WARNING: RCP/GBS computation failed for { benchmark_folder } : { e } "
577+ )
578+ traceback .print_exc ()
579+
580+ # Map into metric-suffixed keys for schema
581+ detailed_bechmark_scores [f"{ benchmark } :rcp_scaling_factor" ] = float (
582+ rcp_scaling_factor
583+ )
494584 if score is not None :
495- benchmark_scores [benchmark ] = score
585+ detailed_bechmark_scores [f"{ benchmark } :time_to_train" ] = score
586+ if benchmark_gbs is not None :
587+ detailed_bechmark_scores [f"{ benchmark } :GBS" ] = float (benchmark_gbs )
588+ if benchmark_epochs is not None :
589+ detailed_bechmark_scores [f"{ benchmark } :samples_to_converge" ] = float (benchmark_epochs )
590+ if benchmark_rcp is not None :
591+ detailed_bechmark_scores [f"{ benchmark } :RCP" ] = benchmark_rcp
496592 if power_score is not None :
497- benchmark_power_scores [benchmark ] = power_score
498- _fill_empty_benchmark_scores (benchmark_scores , usage , ruleset )
499- if len (benchmark_power_scores ) > 0 :
500- _fill_empty_benchmark_scores (benchmark_power_scores , usage , ruleset )
501- return benchmark_scores , benchmark_power_scores
593+ detailed_bechmark_scores [f"{ benchmark } :Energy" ] = power_score
594+ benchmark_scores [f"{ benchmark } " ] = float (
595+ rcp_scaling_factor
596+ )
597+ _fill_empty_benchmark_scores (benchmark_scores , usage , ruleset , detailed = False )
598+ _fill_empty_benchmark_scores (detailed_bechmark_scores , usage , ruleset , detailed = True )
599+ return benchmark_scores , detailed_bechmark_scores
502600
503601
504602def _compute_weak_scaling_scores (desc , system_folder , usage , ruleset ):
@@ -693,6 +791,7 @@ def _fill_empty_benchmark_scores(
693791 usage ,
694792 ruleset ,
695793 weak_scaling = False ,
794+ detailed = False ,
696795):
697796 for benchmark in get_allowed_benchmarks (usage , ruleset ):
698797 if weak_scaling :
@@ -702,8 +801,19 @@ def _fill_empty_benchmark_scores(
702801 benchmark_scores [k ] = None
703802
704803 else :
705- if benchmark not in benchmark_scores :
706- benchmark_scores [benchmark ] = None
804+ if detailed :
805+ strong_schema = _get_strong_scaling_metric_schema ()
806+ for metric , dtype in strong_schema .items ():
807+ k = '{}:{}' .format (benchmark , metric )
808+ if dtype is str :
809+ if k not in benchmark_scores or benchmark_scores [k ] is None :
810+ benchmark_scores [k ] = ''
811+ else :
812+ if k not in benchmark_scores :
813+ benchmark_scores [k ] = None
814+ else :
815+ if benchmark not in benchmark_scores :
816+ benchmark_scores [benchmark ] = None
707817
708818
709819def _get_id_from_sysinfo (summary ):
@@ -841,7 +951,7 @@ def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
841951 weak_scaling_summary = _get_empty_summary (usage ,
842952 ruleset ,
843953 weak_scaling = True )
844- power_summary = _get_empty_summary (usage , ruleset )
954+ detailed_strong_scaling_summary = _get_empty_summary (usage , ruleset , detailed = True )
845955 power_weak_scaling_summary = _get_empty_summary (usage , ruleset , weak_scaling = True )
846956 for system_folder in _get_sub_folders (results_folder ):
847957 folder_parts = system_folder .split ('/' )
@@ -924,8 +1034,8 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
9241034 continue
9251035
9261036 # Compute the scores.
927- strong_scaling_scores , power_scores = _compute_strong_scaling_scores (
928- desc , system_folder , usage , ruleset )
1037+ strong_scaling_scores , detailed_strong_scaling_scores = _compute_strong_scaling_scores (
1038+ desc , system_folder , usage , ruleset , system_specs [ "division" ], rcp_bypass = False )
9291039 if usage == 'hpc' :
9301040 weak_scaling_scores , power_scores_weak_scaling = _compute_weak_scaling_scores (
9311041 desc , system_folder , usage , ruleset )
@@ -950,17 +1060,18 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
9501060 urls .items (),
9511061 ):
9521062 weak_scaling_summary .push (column_name , value )
953- if len (power_scores ) > 0 :
1063+ if len (detailed_strong_scaling_scores ) > 0 :
9541064 for column_name , value in itertools .chain (
9551065 system_specs .items (),
956- power_scores .items (),
1066+ detailed_strong_scaling_scores .items (),
9571067 urls .items (),
9581068 ):
959- power_summary .push (column_name , value )
960- if column_name in strong_scaling_scores :
961- power_summary .push (column_name , strong_scaling_scores [column_name ])
962- else :
963- power_summary .push (column_name , value )
1069+ merged = (
1070+ detailed_strong_scaling_scores [column_name ]
1071+ if column_name in detailed_strong_scaling_scores
1072+ else value
1073+ )
1074+ detailed_strong_scaling_summary .push (column_name , merged )
9641075 if usage == 'hpc' and len (power_scores_weak_scaling ) > 0 :
9651076 for column_name , value in itertools .chain (
9661077 system_specs .items (),
@@ -975,13 +1086,13 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
9751086 if len (weak_scaling_summary ) > 0 :
9761087 weak_scaling_summary = weak_scaling_summary .to_dataframe ().sort_values (
9771088 _get_sort_by_column_names ()).reset_index (drop = True )
978- if len (power_summary ) > 0 :
979- power_summary = power_summary .to_dataframe ().sort_values (
1089+ if len (detailed_strong_scaling_summary ) > 0 :
1090+ detailed_strong_scaling_summary = detailed_strong_scaling_summary .to_dataframe ().sort_values (
9801091 _get_sort_by_column_names ()).reset_index (drop = True )
9811092 if len (power_weak_scaling_summary ) > 0 :
9821093 power_weak_scaling_summary = power_weak_scaling_summary .to_dataframe ().sort_values (
9831094 _get_sort_by_column_names ()).reset_index (drop = True )
984- return strong_scaling_summary , weak_scaling_summary , power_summary , power_weak_scaling_summary
1095+ return strong_scaling_summary , weak_scaling_summary , detailed_strong_scaling_summary , power_weak_scaling_summary
9851096
9861097
9871098
@@ -1039,23 +1150,23 @@ def main():
10391150
10401151 strong_scaling_summaries = []
10411152 weak_scaling_summaries = []
1042- power_summaries = []
1153+ detailed_strong_scaling_summaries = []
10431154 power_weak_scaling_summaries = []
10441155
10451156 def _update_summaries (folder ):
10461157 if args .usage == "Training" :
10471158 config_path = os .path .join (os .path .dirname (__file__ ), "config.yaml" )
10481159 with open (config_path , "r" ) as f :
10491160 config = yaml .safe_load (f )
1050- strong_scaling_summary , weak_scaling_summary , power_summary , power_weak_scaling_summary = summarize_results (
1161+ strong_scaling_summary , weak_scaling_summary , detailed_strong_scaling_summary , power_weak_scaling_summary = summarize_results (
10511162 folder ,
10521163 args .usage ,
10531164 args .ruleset ,
10541165 availability = config ["availability" ],
10551166 generate_private_ids = args .generate_private_ids ,
10561167 )
10571168 else :
1058- strong_scaling_summary , weak_scaling_summary , power_summary , power_weak_scaling_summary = summarize_results (
1169+ strong_scaling_summary , weak_scaling_summary , detailed_strong_scaling_summary , power_weak_scaling_summary = summarize_results (
10591170 folder ,
10601171 args .usage ,
10611172 args .ruleset ,
@@ -1064,8 +1175,8 @@ def _update_summaries(folder):
10641175 strong_scaling_summaries .append (strong_scaling_summary )
10651176 if len (weak_scaling_summary ) > 0 :
10661177 weak_scaling_summaries .append (weak_scaling_summary )
1067- if len (power_summary ) > 0 :
1068- power_summaries .append (power_summary )
1178+ if len (detailed_strong_scaling_summary ) > 0 :
1179+ detailed_strong_scaling_summaries .append (detailed_strong_scaling_summary )
10691180 if len (power_weak_scaling_summary ) > 0 :
10701181 power_weak_scaling_summaries .append (power_weak_scaling_summary )
10711182
@@ -1180,13 +1291,14 @@ def _summaries_to_xlsx(summaries: pd.DataFrame, path, version):
11801291
11811292 writer .save ()
11821293 # Print and write back results.
1183- def _print_and_write (summaries , weak_scaling = False , mode = 'w' , power = False ):
1294+ def _print_and_write (summaries , weak_scaling = False , mode = 'w' , power = False , detailed = False ):
11841295 if len (summaries ) > 0 :
11851296 summaries = pd .concat (summaries ).astype (
11861297 _get_column_schema (
11871298 args .usage ,
11881299 args .ruleset ,
11891300 weak_scaling = weak_scaling ,
1301+ detailed = detailed
11901302 )
11911303 )
11921304 if weak_scaling :
@@ -1208,6 +1320,9 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
12081320 specs_and_notes = [c for c in summaries .columns if c not in benchmarks ]
12091321 csv = csv .replace (".csv" , "_power.csv" )
12101322 summaries .groupby (specs_and_notes ).apply (lambda x : agg_columns_fn (x , benchmarks )).to_csv (csv , mode = mode )
1323+ elif detailed :
1324+ csv = csv .replace (".csv" , "_detailed.csv" )
1325+ summaries .to_csv (csv , index = False , mode = mode )
12111326 else :
12121327 summaries .to_csv (csv , index = False , mode = mode )
12131328 json_path = "summary.json" if args .csv is None else f"""{ csv .replace (".csv" , ".json" )} """
@@ -1224,7 +1339,7 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
12241339 None , 'display.max_colwidth' , None ):
12251340 _print_and_write (strong_scaling_summaries )
12261341 _print_and_write (weak_scaling_summaries , weak_scaling = True , mode = 'a' )
1227- _print_and_write (power_summaries , mode = 'a' , power = True )
1342+ _print_and_write (detailed_strong_scaling_summaries , mode = 'a' , detailed = True )
12281343 _print_and_write (power_weak_scaling_summaries , weak_scaling = True , mode = 'a' , power = True )
12291344
12301345
0 commit comments