1717import sys
1818import math
1919import argparse
20+ from collections import defaultdict
2021
2122def mad_filter (samples , z_thresh = 3.5 ):
2223 """
@@ -77,9 +78,62 @@ def calculate_stats(samples, insn_count):
7778 "ips" : ips
7879 }
7980
81+ def is_baseline_result (result ):
82+ return bool (result .get ("is_baseline" )) or result .get ("name" ) == "NOP_BASELINE" or result .get ("opcode" ) == "NOP_BASELINE"
83+
84+ def build_diagnostics (clean_results , baseline_ns_per_du ):
85+ opcode_rows = []
86+ category_groups = defaultdict (list )
87+
88+ for r in clean_results :
89+ row = {
90+ "name" : r .get ("name" , r .get ("opcode" , "?" )),
91+ "opcode" : r .get ("opcode" , "?" ),
92+ "category" : r .get ("category" ),
93+ "iterations" : r .get ("iterations" , 0 ),
94+ "ns_per_du" : r .get ("ns_per_du" ),
95+ "delta_ns" : r .get ("delta_ns" ),
96+ "stddev_ns" : r .get ("stddev_ns" ),
97+ "is_baseline" : bool (r .get ("is_baseline" , False )),
98+ }
99+ opcode_rows .append (row )
100+
101+ category = r .get ("category" )
102+ if category is not None and not row ["is_baseline" ]:
103+ category_groups [category ].append (row )
104+
105+ sorted_by_delta = sorted (
106+ [r for r in opcode_rows if not r ["is_baseline" ]],
107+ key = lambda r : abs (r .get ("delta_ns" , 0.0 )),
108+ reverse = True ,
109+ )
110+
111+ category_summary = []
112+ for category , rows in sorted (category_groups .items ()):
113+ ns_values = [r ["ns_per_du" ] for r in rows if r .get ("ns_per_du" ) is not None ]
114+ if not ns_values :
115+ continue
116+ category_summary .append ({
117+ "category" : category ,
118+ "count" : len (rows ),
119+ "mean_ns_per_du" : sum (ns_values ) / len (ns_values ),
120+ "min_ns_per_du" : min (ns_values ),
121+ "max_ns_per_du" : max (ns_values ),
122+ "span_ns_per_du" : max (ns_values ) - min (ns_values ),
123+ })
124+
125+ return {
126+ "baseline_source" : "result" if baseline_ns_per_du else "metadata_or_missing" ,
127+ "top_delta_opcodes" : sorted_by_delta [:8 ],
128+ "category_summary" : category_summary ,
129+ "opcode_summary" : sorted (
130+ opcode_rows , key = lambda r : (r ["category" ], r ["opcode" ], r ["name" ])
131+ ),
132+ }
133+
80134def analyze (raw_data ):
81135 # 1. Filter and re-calculate
82- baseline_ns_per_du = 0
136+ baseline_ns_per_du = raw_data . get ( "baseline_ns_per_du" , 0 )
83137 clean_results = []
84138
85139 # First pass: find NOP_BASELINE to set baseline_ns_per_du
@@ -90,7 +144,7 @@ def analyze(raw_data):
90144 # update r with new stats
91145 r .update (stats )
92146
93- if r [ "opcode" ] == "NOP_BASELINE" :
147+ if is_baseline_result ( r ) :
94148 baseline_ns_per_du = r ["ns_per_insn" ]
95149
96150 clean_results .append (r )
@@ -105,7 +159,7 @@ def analyze(raw_data):
105159
106160 groups = []
107161 for r in raw_data .get ("results" , []):
108- if r [ "opcode" ] == "NOP_BASELINE" :
162+ if is_baseline_result ( r ) :
109163 continue
110164 # Exclude category 7 (VM Internal) opcodes like CHECK_INTEGRITY,
111165 # as they do not follow standard DU padding and are inherently distinguishable.
@@ -177,6 +231,7 @@ def analyze(raw_data):
177231 "indistinguishable" : bool (p_value > 0.05 ) if p_value != - 1.0 else None ,
178232 "leakage_bits" : float (mi_bits ) if mi_bits != - 1.0 else None
179233 },
234+ "diagnostics" : build_diagnostics (clean_results , baseline_ns_per_du ),
180235 "results" : clean_results
181236 }
182237
0 commit comments