|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +TDD Test Suite: D3 — Analise Estatistica e Inferencia (N2 + N3) |
| 4 | +CORA-Eval Benchmark Tasks: D3-N2-01 a D3-N2-05, D3-N3-01 a D3-N3-05 |
| 5 | +Todas as verificacoes usam dados sinteticos com ground truth conhecido. |
| 6 | +""" |
| 7 | + |
| 8 | +import sys, math, random |
| 9 | +from typing import List, Tuple |
| 10 | + |
| 11 | +random.seed(42) |
| 12 | + |
| 13 | +# ══════════════════════════════════════════════════════════════════════ |
| 14 | +# FUNCOES ESTATISTICAS IMPLEMENTADAS |
| 15 | +# ══════════════════════════════════════════════════════════════════════ |
| 16 | + |
| 17 | +def mean(x: List[float]) -> float: |
| 18 | + return sum(x) / len(x) |
| 19 | + |
| 20 | +def variance(x: List[float], ddof: int = 1) -> float: |
| 21 | + m = mean(x) |
| 22 | + return sum((xi - m) ** 2 for xi in x) / (len(x) - ddof) |
| 23 | + |
| 24 | +def std(x: List[float], ddof: int = 1) -> float: |
| 25 | + return math.sqrt(variance(x, ddof)) |
| 26 | + |
| 27 | +def t_statistic(x: List[float], y: List[float]) -> float: |
| 28 | + """Estatistica t de Welch (variancias desiguais).""" |
| 29 | + m1, m2 = mean(x), mean(y) |
| 30 | + v1, v2 = variance(x), variance(y) |
| 31 | + n1, n2 = len(x), len(y) |
| 32 | + se = math.sqrt(v1/n1 + v2/n2) |
| 33 | + return (m1 - m2) / se if se > 0 else 0.0 |
| 34 | + |
| 35 | +def welch_df(x: List[float], y: List[float]) -> float: |
| 36 | + """Graus de liberdade de Welch-Satterthwaite.""" |
| 37 | + v1, v2 = variance(x), variance(y) |
| 38 | + n1, n2 = len(x), len(y) |
| 39 | + num = (v1/n1 + v2/n2) ** 2 |
| 40 | + den = (v1/n1)**2/(n1-1) + (v2/n2)**2/(n2-1) |
| 41 | + return num / den if den > 0 else 1.0 |
| 42 | + |
| 43 | +def cohens_d(x: List[float], y: List[float]) -> float: |
| 44 | + """Tamanho de efeito de Cohen.""" |
| 45 | + pooled_sd = math.sqrt((variance(x) + variance(y)) / 2) |
| 46 | + return (mean(x) - mean(y)) / pooled_sd if pooled_sd > 0 else 0.0 |
| 47 | + |
| 48 | +def pearson_r(x: List[float], y: List[float]) -> float: |
| 49 | + """Coeficiente de correlacao de Pearson.""" |
| 50 | + mx, my = mean(x), mean(y) |
| 51 | + num = sum((xi-mx)*(yi-my) for xi, yi in zip(x, y)) |
| 52 | + den = math.sqrt(sum((xi-mx)**2 for xi in x) * sum((yi-my)**2 for yi in y)) |
| 53 | + return num / den if den > 0 else 0.0 |
| 54 | + |
| 55 | +def r_squared(y_true: List[float], y_pred: List[float]) -> float: |
| 56 | + """Coeficiente de determinacao R^2.""" |
| 57 | + ss_res = sum((yt - yp)**2 for yt, yp in zip(y_true, y_pred)) |
| 58 | + ss_tot = sum((yt - mean(y_true))**2 for yt in y_true) |
| 59 | + return 1 - ss_res/ss_tot if ss_tot > 0 else 0.0 |
| 60 | + |
| 61 | +def linear_regression(x: List[float], y: List[float]) -> Tuple[float, float]: |
| 62 | + """Regressao linear simples: y = b0 + b1*x. Retorna (b0, b1).""" |
| 63 | + mx, my = mean(x), mean(y) |
| 64 | + num = sum((xi-mx)*(yi-my) for xi, yi in zip(x, y)) |
| 65 | + den = sum((xi-mx)**2 for xi in x) |
| 66 | + b1 = num / den if den > 0 else 0.0 |
| 67 | + b0 = my - b1 * mx |
| 68 | + return (b0, b1) |
| 69 | + |
| 70 | +def bonferroni_correction(p_values: List[float]) -> List[float]: |
| 71 | + """Correcao de Bonferroni: p_adj = min(1, p * n).""" |
| 72 | + n = len(p_values) |
| 73 | + return [min(1.0, p * n) for p in p_values] |
| 74 | + |
| 75 | +def benjamini_hochberg(p_values: List[float], alpha: float = 0.05) -> List[bool]: |
| 76 | + """FDR control via Benjamini-Hochberg.""" |
| 77 | + n = len(p_values) |
| 78 | + indexed = sorted(enumerate(p_values), key=lambda x: x[1]) |
| 79 | + rejected = [False] * n |
| 80 | + for rank, (idx, p) in enumerate(indexed): |
| 81 | + threshold = (rank + 1) / n * alpha |
| 82 | + if p <= threshold: |
| 83 | + rejected[idx] = True |
| 84 | + else: |
| 85 | + break |
| 86 | + return rejected |
| 87 | + |
| 88 | + |
| 89 | +# ══════════════════════════════════════════════════════════════════════ |
| 90 | +# N2 TESTS (Graduacao) |
| 91 | +# ══════════════════════════════════════════════════════════════════════ |
| 92 | + |
| 93 | +def test_t_test_equal_means(): |
| 94 | + """D3-N2-01: Teste t: duas amostras da mesma populacao -> p alto.""" |
| 95 | + # Amostras da mesma N(10, 2) |
| 96 | + x = [10 + random.gauss(0, 2) for _ in range(50)] |
| 97 | + y = [10 + random.gauss(0, 2) for _ in range(50)] |
| 98 | + t = t_statistic(x, y) |
| 99 | + # |t| < 2 para amostras iguais (p > 0.05 tipicamente) |
| 100 | + assert abs(t) < 2.5, f"t={t:.3f} deveria ser pequeno para amostras iguais" |
| 101 | + print(f" [D3-N2-01] Teste t amostras iguais: t={t:.3f}... PASS") |
| 102 | + return True |
| 103 | + |
| 104 | +def test_t_test_different_means(): |
| 105 | + """D3-N2-01: Teste t: amostras diferentes -> |t| grande, d grande.""" |
| 106 | + x = [10 + random.gauss(0, 1) for _ in range(50)] |
| 107 | + y = [13 + random.gauss(0, 1) for _ in range(50)] # diferenca de 3 SD |
| 108 | + t = t_statistic(x, y) |
| 109 | + d = cohens_d(x, y) |
| 110 | + assert abs(t) > 5, f"t={t:.3f} deveria ser grande" |
| 111 | + assert abs(d) > 2.0, f"Cohen d={d:.3f} deveria ser grande" |
| 112 | + print(f" [D3-N2-01] Teste t diferentes: t={t:.1f}, d={d:.2f}... PASS") |
| 113 | + return True |
| 114 | + |
| 115 | +def test_anova_oneway(): |
| 116 | + """D3-N2-02: ANOVA one-way: 3 grupos com medias diferentes -> F grande.""" |
| 117 | + random.seed(123) |
| 118 | + g1 = [10 + random.gauss(0, 1) for _ in range(30)] |
| 119 | + g2 = [12 + random.gauss(0, 1) for _ in range(30)] |
| 120 | + g3 = [15 + random.gauss(0, 1) for _ in range(30)] |
| 121 | + |
| 122 | + # F = MS_between / MS_within |
| 123 | + all_groups = [g1, g2, g3] |
| 124 | + grand_mean = mean([x for g in all_groups for x in g]) |
| 125 | + ss_between = sum(len(g) * (mean(g) - grand_mean)**2 for g in all_groups) |
| 126 | + ss_within = sum(sum((x - mean(g))**2 for x in g) for g in all_groups) |
| 127 | + df_between = len(all_groups) - 1 |
| 128 | + df_within = sum(len(g) for g in all_groups) - len(all_groups) |
| 129 | + ms_between = ss_between / df_between |
| 130 | + ms_within = ss_within / df_within |
| 131 | + F = ms_between / ms_within |
| 132 | + |
| 133 | + assert F > 10, f"F={F:.1f} deveria ser grande (grupos diferentes)" |
| 134 | + print(f" [D3-N2-02] ANOVA one-way: F={F:.1f}, ss_between={ss_between:.0f}... PASS") |
| 135 | + return True |
| 136 | + |
| 137 | +def test_linear_regression_perfect(): |
| 138 | + """D3-N2-03: Regressao linear: y = 2 + 3x + ruido -> recupera parametros.""" |
| 139 | + random.seed(456) |
| 140 | + x = [i for i in range(50)] |
| 141 | + y = [2 + 3*xi + random.gauss(0, 2) for xi in x] |
| 142 | + b0, b1 = linear_regression(x, y) |
| 143 | + assert abs(b0 - 2) < 0.8, f"b0={b0:.2f}, esperado ~2" |
| 144 | + assert abs(b1 - 3) < 0.05, f"b1={b1:.2f}, esperado ~3" |
| 145 | + y_pred = [b0 + b1*xi for xi in x] |
| 146 | + r2 = r_squared(y, y_pred) |
| 147 | + assert r2 > 0.95, f"R^2={r2:.3f} deveria ser >0.95" |
| 148 | + print(f" [D3-N2-03] Regressao: y={b0:.2f}+{b1:.3f}x, R^2={r2:.3f}... PASS") |
| 149 | + return True |
| 150 | + |
| 151 | +def test_pearson_correlation(): |
| 152 | + """D3-N2-04: Correlacao de Pearson: r ~ 1 para y = 2x, r ~ 0 para independentes.""" |
| 153 | + random.seed(789) |
| 154 | + x = [random.gauss(0, 1) for _ in range(200)] |
| 155 | + # Perfeitamente correlacionado |
| 156 | + y_corr = [2*xi + random.gauss(0, 0.1) for xi in x] |
| 157 | + r_corr = pearson_r(x, y_corr) |
| 158 | + assert r_corr > 0.95, f"r={r_corr:.3f} deveria ser ~1" |
| 159 | + # Independente |
| 160 | + y_ind = [random.gauss(0, 1) for _ in range(200)] |
| 161 | + r_ind = pearson_r(x, y_ind) |
| 162 | + assert abs(r_ind) < 0.3, f"r={r_ind:.3f} deveria ser ~0" |
| 163 | + print(f" [D3-N2-04] Pearson: r_corr={r_corr:.3f}, r_ind={r_ind:.3f}... PASS") |
| 164 | + return True |
| 165 | + |
| 166 | +def test_multiple_comparison(): |
| 167 | + """D3-N2-05 / D3-N3-04: Correcao de multiplas comparacoes.""" |
| 168 | + # 20 testes, 2 verdadeiros positivos (p=0.001), 18 nulos (p~U(0,1)) |
| 169 | + random.seed(101) |
| 170 | + p_values = [0.001, 0.001] + [random.random() for _ in range(18)] |
| 171 | + random.shuffle(p_values) |
| 172 | + |
| 173 | + # Bonferroni: nenhum falso positivo |
| 174 | + bonf_adj = bonferroni_correction(p_values) |
| 175 | + n_rejected_bonf = sum(1 for p in bonf_adj if p < 0.05) |
| 176 | + assert n_rejected_bonf <= 3, f"Bonferroni rejeitou {n_rejected_bonf} (muitos falsos positivos?)" |
| 177 | + |
| 178 | + # Benjamini-Hochberg: controla FDR |
| 179 | + bh_rej = benjamini_hochberg(p_values, alpha=0.05) |
| 180 | + n_rejected_bh = sum(bh_rej) |
| 181 | + # BH deve rejeitar os 2 verdadeiros + talvez alguns falsos (controlado) |
| 182 | + assert n_rejected_bh >= 2, f"BH rejeitou apenas {n_rejected_bh}" |
| 183 | + print(f" [D3-N3-04] Multiplas comparacoes: Bonf={n_rejected_bonf}, BH={n_rejected_bh}... PASS") |
| 184 | + return True |
| 185 | + |
| 186 | + |
| 187 | +# ══════════════════════════════════════════════════════════════════════ |
| 188 | +# N3 TESTS (Pos-Graduacao) |
| 189 | +# ══════════════════════════════════════════════════════════════════════ |
| 190 | + |
| 191 | +def test_mcmc_metropolis_hastings(): |
| 192 | + """D3-N3-01: MCMC Metropolis-Hastings para amostrar N(0,1).""" |
| 193 | + random.seed(2021) |
| 194 | + def target(x): return math.exp(-0.5 * x * x) # N(0,1) nao-normalizada |
| 195 | + |
| 196 | + n_iter = 5000 |
| 197 | + burn_in = 1000 |
| 198 | + samples = [] |
| 199 | + current = 0.0 |
| 200 | + |
| 201 | + for i in range(n_iter + burn_in): |
| 202 | + proposal = current + random.gauss(0, 1.0) |
| 203 | + alpha = target(proposal) / (target(current) + 1e-10) |
| 204 | + if random.random() < min(1.0, alpha): |
| 205 | + current = proposal |
| 206 | + if i >= burn_in: |
| 207 | + samples.append(current) |
| 208 | + |
| 209 | + # Verifica convergencia: media ~ 0, std ~ 1 |
| 210 | + sample_mean = mean(samples) |
| 211 | + sample_std = std(samples) |
| 212 | + assert abs(sample_mean) < 0.1, f"Media MCMC={sample_mean:.3f}, esperado ~0" |
| 213 | + assert abs(sample_std - 1.0) < 0.15, f"Std MCMC={sample_std:.3f}, esperado ~1" |
| 214 | + # Taxa de aceitacao tipica: 20-50% |
| 215 | + accept_rate = len(set(samples)) / len(samples) # aproximacao |
| 216 | + print(f" [D3-N3-01] MCMC: mean={sample_mean:.3f}, std={sample_std:.3f}... PASS") |
| 217 | + return True |
| 218 | + |
| 219 | +def test_pca_variance_explained(): |
| 220 | + """D3-N3-02: PCA — variancia explicada por componente. |
| 221 | + Dados: 2D com correlacao 0.9 na direcao (1,1).""" |
| 222 | + random.seed(303) |
| 223 | + n = 200 |
| 224 | + # Gera dados correlacionados |
| 225 | + z1 = [random.gauss(0, 3) for _ in range(n)] # PC1: alta variancia |
| 226 | + z2 = [random.gauss(0, 0.3) for _ in range(n)] # PC2: baixa variancia |
| 227 | + # Rotaciona 45 graus |
| 228 | + x = [zi * 0.707 - zj * 0.707 for zi, zj in zip(z1, z2)] |
| 229 | + y = [zi * 0.707 + zj * 0.707 for zi, zj in zip(z1, z2)] |
| 230 | + |
| 231 | + # Covariancia manual |
| 232 | + mx, my = mean(x), mean(y) |
| 233 | + cov_xx = sum((xi-mx)**2 for xi in x) / (n-1) |
| 234 | + cov_yy = sum((yi-my)**2 for yi in y) / (n-1) |
| 235 | + cov_xy = sum((xi-mx)*(yi-my) for xi, yi in zip(x, y)) / (n-1) |
| 236 | + |
| 237 | + # Autovalores da matriz de covariancia 2x2 |
| 238 | + trace = cov_xx + cov_yy |
| 239 | + det = cov_xx * cov_yy - cov_xy * cov_xy |
| 240 | + disc = math.sqrt(trace*trace - 4*det) |
| 241 | + lambda1 = (trace + disc) / 2 |
| 242 | + lambda2 = (trace - disc) / 2 |
| 243 | + |
| 244 | + var_explained_pc1 = lambda1 / (lambda1 + lambda2) |
| 245 | + # PC1 deve explicar a maioria da variancia (>70%) |
| 246 | + assert var_explained_pc1 > 0.70, f"PC1 explica {var_explained_pc1:.1%}" |
| 247 | + print(f" [D3-N3-02] PCA: PC1={lambda1:.2f}, PC2={lambda2:.2f}, VarPC1={var_explained_pc1:.1%}... PASS") |
| 248 | + return True |
| 249 | + |
| 250 | +def test_bootstrap_ci(): |
| 251 | + """D3-N2-04 (ext): Bootstrap IC 95% para media. |
| 252 | + Populacao N(10, 2). Amostra n=30. IC deve conter 10.""" |
| 253 | + random.seed(505) |
| 254 | + population_mean = 10.0 |
| 255 | + sample = [random.gauss(population_mean, 2) for _ in range(30)] |
| 256 | + |
| 257 | + # Bootstrap: 1000 reamostragens |
| 258 | + n_boot = 1000 |
| 259 | + boot_means = [] |
| 260 | + for _ in range(n_boot): |
| 261 | + boot_sample = random.choices(sample, k=len(sample)) |
| 262 | + boot_means.append(mean(boot_sample)) |
| 263 | + |
| 264 | + boot_means.sort() |
| 265 | + ci_lower = boot_means[25] # 2.5% |
| 266 | + ci_upper = boot_means[974] # 97.5% |
| 267 | + |
| 268 | + assert ci_lower <= population_mean <= ci_upper, \ |
| 269 | + f"IC=[{ci_lower:.2f}, {ci_upper:.2f}] nao contem media={population_mean}" |
| 270 | + print(f" [D3-N2-04] Bootstrap IC 95%: [{ci_lower:.2f}, {ci_upper:.2f}] contem {population_mean}... PASS") |
| 271 | + return True |
| 272 | + |
| 273 | + |
| 274 | +# ══════════════════════════════════════════════════════════════════════ |
| 275 | +# RUNNER |
| 276 | +# ══════════════════════════════════════════════════════════════════════ |
| 277 | + |
| 278 | +def main(): |
| 279 | + tests = [ |
| 280 | + # N2 |
| 281 | + ("t-test equal", test_t_test_equal_means), |
| 282 | + ("t-test different", test_t_test_different_means), |
| 283 | + ("ANOVA one-way", test_anova_oneway), |
| 284 | + ("Linear regression", test_linear_regression_perfect), |
| 285 | + ("Pearson correlation", test_pearson_correlation), |
| 286 | + ("Bootstrap CI", test_bootstrap_ci), |
| 287 | + # N3 |
| 288 | + ("MCMC Metropolis-Hastings", test_mcmc_metropolis_hastings), |
| 289 | + ("PCA variance", test_pca_variance_explained), |
| 290 | + ("Multiple comparison", test_multiple_comparison), |
| 291 | + ] |
| 292 | + |
| 293 | + print("=" * 60) |
| 294 | + print(" TDD TEST SUITE: D3 — Estatistica (N2+N3)") |
| 295 | + print("=" * 60) |
| 296 | + |
| 297 | + passed = 0 |
| 298 | + failed = 0 |
| 299 | + for name, test_fn in tests: |
| 300 | + try: |
| 301 | + test_fn() |
| 302 | + passed += 1 |
| 303 | + except AssertionError as e: |
| 304 | + print(f" [{name}] FAIL: {e}") |
| 305 | + failed += 1 |
| 306 | + |
| 307 | + print(f"\n RESULT: {passed}/{passed+failed} passed, {failed} failed") |
| 308 | + print("=" * 60) |
| 309 | + return failed == 0 |
| 310 | + |
| 311 | +if __name__ == "__main__": |
| 312 | + sys.exit(0 if main() else 1) |
0 commit comments