@@ -58,6 +58,8 @@ def measure_execution_time(func, input_size, iterations=5):
5858
5959 # 3. Execution with determined input
6060 try :
61+ # Warm up once to reduce cold-start noise
62+ func (input_data )
6163 start_time = time .perf_counter ()
6264 for _ in range (iterations ):
6365 func (input_data )
@@ -73,6 +75,8 @@ def measure_execution_time(func, input_size, iterations=5):
7375def _measure_heuristic (func , input_size , iterations ):
7476 """Fallback: Try int first, then list."""
7577 try :
78+ # Warm up once to reduce cold-start noise
79+ func (input_size )
7680 # Try passing integer N
7781 start_time = time .perf_counter ()
7882 for _ in range (iterations ):
@@ -82,6 +86,8 @@ def _measure_heuristic(func, input_size, iterations):
8286 except TypeError :
8387 # Try passing list of size N
8488 data = list (range (input_size ))
89+ # Warm up once to reduce cold-start noise
90+ func (data )
8591 start_time = time .perf_counter ()
8692 for _ in range (iterations ):
8793 func (data )
@@ -127,6 +133,82 @@ def _compute_residuals(normalized_times, theoretical):
127133 return [t - (a * x + b ) for t , x in zip (normalized_times , theoretical )]
128134
129135
136+ def _tie_break_linear_vs_nlogn (n_values , times , scores ):
137+ linear_rmse = scores .get ("O(n) (Linear)" )
138+ nlogn_rmse = scores .get ("O(n log n) (Linearithmic)" )
139+ if linear_rmse is None or nlogn_rmse is None :
140+ return None , None
141+
142+ relative_eps = 0.05
143+ threshold = relative_eps * min (linear_rmse , nlogn_rmse )
144+ if abs (linear_rmse - nlogn_rmse ) > threshold :
145+ return None , None
146+
147+ # Filter pairs together to maintain alignment (use n > 1 to avoid log(1)=0)
148+ pairs = [(n , t ) for n , t in zip (n_values , times ) if n > 1 and t > 0 ]
149+ if len (pairs ) < 2 :
150+ return None , None
151+
152+ log_n = [math .log (n ) for n , _ in pairs ]
153+ log_t = [math .log (t ) for _ , t in pairs ]
154+
155+ mean_ln = statistics .fmean (log_n )
156+ mean_lt = statistics .fmean (log_t )
157+ var_ln = sum ((x - mean_ln ) ** 2 for x in log_n )
158+ if var_ln == 0 :
159+ return None , None
160+
161+ cov = sum ((x - mean_ln ) * (y - mean_lt ) for x , y in zip (log_n , log_t ))
162+ slope = cov / var_ln
163+ n_mid = math .exp (mean_ln )
164+ ln_mid = math .log (n_mid )
165+
166+ # Guard against division by zero when n_mid is near 1
167+ if abs (ln_mid ) < 1e-6 :
168+ return None , None
169+
170+ target_nlogn = 1.0 + (1.0 / ln_mid )
171+
172+ if abs (slope - 1.0 ) <= abs (slope - target_nlogn ):
173+ return "O(n) (Linear)" , linear_rmse
174+ return "O(n log n) (Linearithmic)" , nlogn_rmse
175+
176+
177+ def _score_models (normalized_times , models , model_priority ):
178+ best_fit = None
179+ best_score = float ("inf" )
180+ scores = {}
181+
182+ for name , theoretical in models :
183+ try :
184+ residuals = _compute_residuals (normalized_times , theoretical )
185+ if residuals is None :
186+ continue
187+
188+ rmse = math .sqrt (statistics .fmean (r * r for r in residuals ))
189+ scores [name ] = rmse
190+
191+ # Use 5% relative epsilon for tie-breaking to handle timing noise
192+ # and prefer simpler models when fits are comparable
193+ relative_eps = 0.05
194+ threshold = relative_eps * best_score if best_score > 0 else 1e-9
195+
196+ if rmse < best_score - threshold :
197+ best_score = rmse
198+ best_fit = name
199+ continue
200+
201+ if abs (rmse - best_score ) <= threshold :
202+ current_priority = model_priority [best_fit ] if best_fit else 999
203+ if model_priority [name ] < current_priority :
204+ best_fit = name
205+ best_score = rmse
206+ except statistics .StatisticsError :
207+ continue
208+
209+ return best_fit , best_score , scores
210+
211+
130212def detect_complexity (n_values , times ):
131213 """
132214 Estimate complexity by fitting theoretical curves to measured times.
@@ -164,33 +246,17 @@ def detect_complexity(n_values, times):
164246 "O(n^2) (Quadratic)" : 4 ,
165247 }
166248
167- best_fit = None
168- best_score = float ("inf" )
169-
170- for name , theoretical in models :
171- try :
172- residuals = _compute_residuals (normalized_times , theoretical )
173- if residuals is None :
174- continue
175-
176- rmse = math .sqrt (statistics .fmean (r * r for r in residuals ))
177-
178- # Use 5% relative epsilon for tie-breaking to handle timing noise
179- # and prefer simpler models when fits are comparable
180- relative_eps = 0.05
181- threshold = relative_eps * best_score if best_score > 0 else 1e-9
182-
183- if rmse < best_score - threshold :
184- best_score = rmse
185- best_fit = name
186- elif abs (rmse - best_score ) <= threshold :
187- # Scores are effectively tied; prefer simpler model
188- current_priority = model_priority [best_fit ] if best_fit else 999
189- if model_priority [name ] < current_priority :
190- best_fit = name
191- best_score = rmse
192- except statistics .StatisticsError :
193- continue
249+ best_fit , best_score , scores = _score_models (
250+ normalized_times ,
251+ models ,
252+ model_priority ,
253+ )
254+
255+ if best_fit in ("O(n) (Linear)" , "O(n log n) (Linearithmic)" ):
256+ tie_fit , tie_score = _tie_break_linear_vs_nlogn (n_values , times , scores )
257+ if tie_fit is not None :
258+ best_fit = tie_fit
259+ best_score = tie_score
194260
195261 return best_fit , best_score
196262
0 commit comments