@@ -84,19 +84,23 @@ def benchmark_model(
8484 times .append (time .perf_counter () - start )
8585
8686 avg = float (np .mean (times ))
87- std = float (np .std (times ))
87+ med = float (np .median (times ))
88+ q25 = float (np .percentile (times , 25 ))
89+ q75 = float (np .percentile (times , 75 ))
8890 nvars = int (m .nvars )
8991 ncons = int (m .ncons )
9092 print (
9193 f" { label :55s} ({ nvars :>9,} vars, { ncons :>9,} cons): "
92- f"{ avg * 1000 :7.1f} ms ± { std * 1000 :5 .1f} ms "
94+ f"{ med * 1000 :7.1f} ms (IQR { q25 * 1000 :.1f} – { q75 * 1000 :.1f } ms) "
9395 )
9496 return {
9597 "label" : label ,
9698 "nvars" : nvars ,
9799 "ncons" : ncons ,
98100 "mean_s" : avg ,
99- "std_s" : std ,
101+ "median_s" : med ,
102+ "q25_s" : q25 ,
103+ "q75_s" : q75 ,
100104 "times_s" : times ,
101105 }
102106
@@ -136,9 +140,9 @@ def run_benchmarks(
136140 1500 ,
137141 2000 ,
138142 ]:
139- r = benchmark_model (
140- f"basic N= { n } " , basic_model ( n ), iterations , io_api = io_api
141- )
143+ # More iterations for small models to reduce noise
144+ iters = iterations * 5 if n <= 100 else iterations
145+ r = benchmark_model ( f"basic N= { n } " , basic_model ( n ), iters , io_api = io_api )
142146 r ["model" ] = "basic"
143147 r ["param" ] = n
144148 results .append (r )
@@ -175,56 +179,85 @@ def plot_comparison(file_old: str, file_new: str) -> None:
175179 label_old = data_old .get ("label" , Path (file_old ).stem )
176180 label_new = data_new .get ("label" , Path (file_new ).stem )
177181
178- nv_old = [r ["nvars" ] for r in data_old ["results" ]]
179- ms_old = [r ["mean_s" ] * 1000 for r in data_old ["results" ]]
180- std_old = [r ["std_s" ] * 1000 for r in data_old ["results" ]]
181- nv_new = [r ["nvars" ] for r in data_new ["results" ]]
182- ms_new = [r ["mean_s" ] * 1000 for r in data_new ["results" ]]
183- std_new = [r ["std_s" ] * 1000 for r in data_new ["results" ]]
182+ def get_stats (data ):
183+ """Extract median and IQR from results, falling back to mean/std."""
184+ nv = [r ["nvars" ] for r in data ["results" ]]
185+ if "median_s" in data ["results" ][0 ]:
186+ med = [r ["median_s" ] * 1000 for r in data ["results" ]]
187+ lo = [r ["q25_s" ] * 1000 for r in data ["results" ]]
188+ hi = [r ["q75_s" ] * 1000 for r in data ["results" ]]
189+ else :
190+ med = [r ["mean_s" ] * 1000 for r in data ["results" ]]
191+ std = [r ["std_s" ] * 1000 for r in data ["results" ]]
192+ lo = [m - s for m , s in zip (med , std )]
193+ hi = [m + s for m , s in zip (med , std )]
194+ return nv , med , lo , hi
195+
196+ nv_old , med_old , lo_old , hi_old = get_stats (data_old )
197+ nv_new , med_new , lo_new , hi_new = get_stats (data_new )
184198
185199 color_old , color_new = "#1f77b4" , "#ff7f0e"
186200
187201 fig , axes = plt .subplots (2 , 2 , figsize = (14 , 10 ))
188202 fig .suptitle (f"LP Write Performance: { label_old } vs { label_new } " , fontsize = 14 )
189203
204+ def plot_errorbar (ax , nv , med , lo , hi , ** kwargs ):
205+ yerr_lo = [m - l for m , l in zip (med , lo )]
206+ yerr_hi = [h - m for m , h in zip (med , hi )]
207+ ax .errorbar (nv , med , yerr = [yerr_lo , yerr_hi ], capsize = 3 , ** kwargs )
208+
190209 # Panel 1: All data, log-log
191210 ax = axes [0 , 0 ]
192- ax .errorbar (
211+ plot_errorbar (
212+ ax ,
193213 nv_old ,
194- ms_old ,
195- yerr = std_old ,
214+ med_old ,
215+ lo_old ,
216+ hi_old ,
196217 marker = "o" ,
197218 color = color_old ,
198219 linestyle = "--" ,
199220 label = label_old ,
200221 alpha = 0.8 ,
201- capsize = 3 ,
202222 )
203- ax .errorbar (
223+ plot_errorbar (
224+ ax ,
204225 nv_new ,
205- ms_new ,
206- yerr = std_new ,
226+ med_new ,
227+ lo_new ,
228+ hi_new ,
207229 marker = "s" ,
208230 color = color_new ,
209231 linestyle = "-" ,
210232 label = label_new ,
211233 alpha = 0.8 ,
212- capsize = 3 ,
213234 )
214235 ax .set_xscale ("log" )
215236 ax .set_yscale ("log" )
216237 ax .set_xlabel ("Number of variables" )
217- ax .set_ylabel ("Write time (ms)" )
238+ ax .set_ylabel ("Write time (ms, median )" )
218239 ax .set_title ("IO time vs problem size (log-log)" )
219240 ax .legend ()
220241 ax .grid (True , alpha = 0.3 )
221242
222- # Panel 2: Speedup ratio (old/new)
243+ # Panel 2: Speedup ratio (old/new) with IQR-based bounds
223244 ax = axes [0 , 1 ]
224245 if len (nv_old ) == len (nv_new ):
225- speedup = [o / n for o , n in zip (ms_old , ms_new )]
226- ax .plot (nv_old , speedup , marker = "o" , color = "#2ca02c" )
227- ax .fill_between (nv_old , 1.0 , speedup , alpha = 0.15 , color = "#2ca02c" )
246+ speedup = [o / n for o , n in zip (med_old , med_new )]
247+ # Conservative bounds: best case = hi_old/lo_new, worst = lo_old/hi_new
248+ speedup_lo = [l / h for l , h in zip (lo_old , hi_new )]
249+ speedup_hi = [h / l for h , l in zip (hi_old , lo_new )]
250+ yerr_lo = [s - sl for s , sl in zip (speedup , speedup_lo )]
251+ yerr_hi = [sh - s for s , sh in zip (speedup , speedup_hi )]
252+ ax .errorbar (
253+ nv_old ,
254+ speedup ,
255+ yerr = [yerr_lo , yerr_hi ],
256+ marker = "o" ,
257+ color = "#2ca02c" ,
258+ capsize = 3 ,
259+ )
260+ ax .fill_between (nv_old , speedup_lo , speedup_hi , alpha = 0.15 , color = "#2ca02c" )
228261 ax .axhline (1.0 , color = "gray" , linestyle = "--" , alpha = 0.5 )
229262 ax .set_xscale ("log" )
230263 ax .set_xlabel ("Number of variables" )
@@ -237,30 +270,32 @@ def plot_comparison(file_old: str, file_new: str) -> None:
237270 cutoff = 25000
238271 idx_old = [i for i , n in enumerate (nv_old ) if n <= cutoff ]
239272 idx_new = [i for i , n in enumerate (nv_new ) if n <= cutoff ]
240- ax .errorbar (
273+ plot_errorbar (
274+ ax ,
241275 [nv_old [i ] for i in idx_old ],
242- [ms_old [i ] for i in idx_old ],
243- yerr = [std_old [i ] for i in idx_old ],
276+ [med_old [i ] for i in idx_old ],
277+ [lo_old [i ] for i in idx_old ],
278+ [hi_old [i ] for i in idx_old ],
244279 marker = "o" ,
245280 color = color_old ,
246281 linestyle = "--" ,
247282 label = label_old ,
248283 alpha = 0.8 ,
249- capsize = 3 ,
250284 )
251- ax .errorbar (
285+ plot_errorbar (
286+ ax ,
252287 [nv_new [i ] for i in idx_new ],
253- [ms_new [i ] for i in idx_new ],
254- yerr = [std_new [i ] for i in idx_new ],
288+ [med_new [i ] for i in idx_new ],
289+ [lo_new [i ] for i in idx_new ],
290+ [hi_new [i ] for i in idx_new ],
255291 marker = "s" ,
256292 color = color_new ,
257293 linestyle = "-" ,
258294 label = label_new ,
259295 alpha = 0.8 ,
260- capsize = 3 ,
261296 )
262297 ax .set_xlabel ("Number of variables" )
263- ax .set_ylabel ("Write time (ms)" )
298+ ax .set_ylabel ("Write time (ms, median )" )
264299 ax .set_ylim (bottom = 0 )
265300 ax .set_title (f"Small models (≤ { cutoff :,} vars)" )
266301 ax .legend ()
@@ -270,31 +305,33 @@ def plot_comparison(file_old: str, file_new: str) -> None:
270305 ax = axes [1 , 1 ]
271306 idx_old = [i for i , n in enumerate (nv_old ) if n > cutoff ]
272307 idx_new = [i for i , n in enumerate (nv_new ) if n > cutoff ]
273- ax .errorbar (
308+ plot_errorbar (
309+ ax ,
274310 [nv_old [i ] for i in idx_old ],
275- [ms_old [i ] for i in idx_old ],
276- yerr = [std_old [i ] for i in idx_old ],
311+ [med_old [i ] for i in idx_old ],
312+ [lo_old [i ] for i in idx_old ],
313+ [hi_old [i ] for i in idx_old ],
277314 marker = "o" ,
278315 color = color_old ,
279316 linestyle = "--" ,
280317 label = label_old ,
281318 alpha = 0.8 ,
282- capsize = 3 ,
283319 )
284- ax .errorbar (
320+ plot_errorbar (
321+ ax ,
285322 [nv_new [i ] for i in idx_new ],
286- [ms_new [i ] for i in idx_new ],
287- yerr = [std_new [i ] for i in idx_new ],
323+ [med_new [i ] for i in idx_new ],
324+ [lo_new [i ] for i in idx_new ],
325+ [hi_new [i ] for i in idx_new ],
288326 marker = "s" ,
289327 color = color_new ,
290328 linestyle = "-" ,
291329 label = label_new ,
292330 alpha = 0.8 ,
293- capsize = 3 ,
294331 )
295332 ax .set_xscale ("log" )
296333 ax .set_xlabel ("Number of variables" )
297- ax .set_ylabel ("Write time (ms)" )
334+ ax .set_ylabel ("Write time (ms, median )" )
298335 ax .set_title (f"Large models (> { cutoff :,} vars)" )
299336 ax .legend ()
300337 ax .grid (True , alpha = 0.3 )
0 commit comments