|
| 1 | +import numpy as np |
| 2 | +from matplotlib import pyplot as plt |
| 3 | +import pandas as pd |
| 4 | +from datamatrix import DataMatrix |
| 5 | +import pingouin as pg |
| 6 | + |
| 7 | +# Set the backend to Agg for headless operation |
| 8 | +plt.switch_backend('Agg') |
| 9 | + |
| 10 | +# ------------------------------------------------------------------ |
| 11 | +# Fixtures / helpers |
| 12 | +# ------------------------------------------------------------------ |
| 13 | + |
| 14 | +TEST_CLASSES = pd.DataFrame, DataMatrix |
| 15 | + |
| 16 | +def _make_sample(cls, design='between', n_subjects=32, seed=42): |
| 17 | + """Return a sample dataset for a given class.""" |
| 18 | + np.random.seed(seed) |
| 19 | + |
| 20 | + if design == 'between': |
| 21 | + # Between-subjects design |
| 22 | + data = { |
| 23 | + 'subject': np.arange(n_subjects), |
| 24 | + 'group': np.repeat(['A', 'B'], n_subjects // 2), |
| 25 | + 'score': np.concatenate([ |
| 26 | + np.random.normal(100, 15, n_subjects // 2), |
| 27 | + np.random.normal(110, 15, n_subjects // 2) |
| 28 | + ]), |
| 29 | + 'age': np.random.normal(25, 5, n_subjects), |
| 30 | + 'treatment': np.tile(['placebo', 'drug'], n_subjects // 2) |
| 31 | + } |
| 32 | + elif design == 'within': |
| 33 | + # Within-subjects design |
| 34 | + n_obs = n_subjects // 3 |
| 35 | + data = { |
| 36 | + 'subject': np.repeat(np.arange(n_obs), 3), |
| 37 | + 'time': np.tile(['T1', 'T2', 'T3'], n_obs), |
| 38 | + 'score': np.concatenate([ |
| 39 | + np.random.normal(100, 10, n_obs), |
| 40 | + np.random.normal(105, 10, n_obs), |
| 41 | + np.random.normal(110, 10, n_obs) |
| 42 | + ]) + np.random.normal(0, 5, n_obs * 3) |
| 43 | + } |
| 44 | + elif design == 'mixed': |
| 45 | + # Mixed design |
| 46 | + n_obs = n_subjects // 2 |
| 47 | + data = { |
| 48 | + 'subject': np.tile(np.arange(n_obs), 2), |
| 49 | + 'time': np.repeat(['pre', 'post'], n_obs), |
| 50 | + 'group': np.concatenate([np.repeat(['control', 'treatment'], n_obs // 2)] * 2), |
| 51 | + 'score': np.concatenate([ |
| 52 | + np.random.normal(100, 10, n_obs), |
| 53 | + np.random.normal(110, 10, n_obs) |
| 54 | + ]) |
| 55 | + } |
| 56 | + elif design == 'correlation': |
| 57 | + # For correlation analyses |
| 58 | + data = { |
| 59 | + 'x': np.random.normal(100, 15, n_subjects), |
| 60 | + 'y': np.random.normal(100, 15, n_subjects), |
| 61 | + 'z': np.random.normal(100, 15, n_subjects) |
| 62 | + } |
| 63 | + # Add correlation |
| 64 | + data['y'] = data['x'] * 0.7 + np.random.normal(0, 10, n_subjects) |
| 65 | + data['z'] = data['x'] * 0.3 + data['y'] * 0.4 + np.random.normal(0, 10, n_subjects) |
| 66 | + |
| 67 | + return cls(data) |
| 68 | + |
| 69 | +# ------------------------------------------------------------------ |
| 70 | +# T-test Tests |
| 71 | +# ------------------------------------------------------------------ |
| 72 | + |
| 73 | +def test_ttest_one_sample(): |
| 74 | + """Test one-sample t-test.""" |
| 75 | + for cls in TEST_CLASSES: |
| 76 | + df = _make_sample(cls, design='between') |
| 77 | + result = pg.ttest(df['score'], 100) |
| 78 | + assert 'T' in result |
| 79 | + assert 'p_val' in result |
| 80 | + |
| 81 | +def test_ttest_independent(): |
| 82 | + """Test independent samples t-test.""" |
| 83 | + for cls in TEST_CLASSES: |
| 84 | + df = _make_sample(cls, design='between') |
| 85 | + group_a = df[df['group'] == 'A']['score'] |
| 86 | + group_b = df[df['group'] == 'B']['score'] |
| 87 | + result = pg.ttest(group_a, group_b, paired=False) |
| 88 | + assert 'T' in result |
| 89 | + assert 'p_val' in result |
| 90 | + |
| 91 | +def test_ttest_paired(): |
| 92 | + """Test paired samples t-test.""" |
| 93 | + for cls in TEST_CLASSES: |
| 94 | + df = _make_sample(cls, design='within') |
| 95 | + t1_scores = df[df['time'] == 'T1']['score'] |
| 96 | + t2_scores = df[df['time'] == 'T2']['score'] |
| 97 | + # Ensure same length for paired test |
| 98 | + min_len = min(len(t1_scores), len(t2_scores)) |
| 99 | + result = pg.ttest(t1_scores[:min_len], t2_scores[:min_len], paired=True) |
| 100 | + assert 'T' in result |
| 101 | + assert 'p_val' in result |
| 102 | + |
| 103 | +# ------------------------------------------------------------------ |
| 104 | +# ANOVA Tests |
| 105 | +# ------------------------------------------------------------------ |
| 106 | + |
| 107 | +def test_anova_oneway(): |
| 108 | + """Test one-way ANOVA.""" |
| 109 | + for cls in TEST_CLASSES: |
| 110 | + df = _make_sample(cls, design='between') |
| 111 | + result = pg.anova(data=df, dv='score', between='group') |
| 112 | + assert 'F' in result.columns |
| 113 | + assert 'p_unc' in result.columns |
| 114 | + |
| 115 | +def test_rm_anova(): |
| 116 | + """Test repeated measures ANOVA.""" |
| 117 | + for cls in TEST_CLASSES: |
| 118 | + df = _make_sample(cls, design='within') |
| 119 | + result = pg.rm_anova(data=df, dv='score', within='time', subject='subject') |
| 120 | + assert 'F' in result.columns |
| 121 | + assert 'p_unc' in result.columns |
| 122 | + |
| 123 | +def test_mixed_anova(): |
| 124 | + """Test mixed ANOVA.""" |
| 125 | + for cls in TEST_CLASSES: |
| 126 | + df = _make_sample(cls, design='mixed') |
| 127 | + result = pg.mixed_anova(data=df, dv='score', within='time', |
| 128 | + between='group', subject='subject') |
| 129 | + assert 'F' in result.columns |
| 130 | + assert 'p_unc' in result.columns |
| 131 | + |
| 132 | +# ------------------------------------------------------------------ |
| 133 | +# Correlation Tests |
| 134 | +# ------------------------------------------------------------------ |
| 135 | + |
| 136 | +def test_correlation(): |
| 137 | + """Test Pearson and Spearman correlation.""" |
| 138 | + for cls in TEST_CLASSES: |
| 139 | + df = _make_sample(cls, design='correlation') |
| 140 | + |
| 141 | + # Pearson correlation |
| 142 | + n, r, ci, p, bf, power = pg.corr(df['x'], df['y'], method='pearson').values[0] |
| 143 | + assert -1 <= r <= 1 |
| 144 | + assert 0 <= p <= 1 |
| 145 | + |
| 146 | + # Spearman correlation |
| 147 | + n, r, ci, p, power = pg.corr(df['x'], df['y'], method='spearman').values[0] |
| 148 | + assert -1 <= r <= 1 |
| 149 | + assert 0 <= p <= 1 |
| 150 | + |
| 151 | +def test_pairwise_correlation(): |
| 152 | + """Test pairwise correlations.""" |
| 153 | + for cls in TEST_CLASSES: |
| 154 | + df = _make_sample(cls, design='correlation') |
| 155 | + result = pg.pairwise_corr(df, columns=['x', 'y', 'z']) |
| 156 | + assert len(result) > 0 |
| 157 | + assert 'r' in result.columns |
| 158 | + assert 'p_unc' in result.columns |
| 159 | + |
| 160 | +def test_partial_correlation(): |
| 161 | + """Test partial correlation.""" |
| 162 | + for cls in TEST_CLASSES: |
| 163 | + df = _make_sample(cls, design='correlation') |
| 164 | + result = pg.partial_corr(data=df, x='x', y='y', covar='z') |
| 165 | + assert 'r' in result.columns |
| 166 | + assert 'p_val' in result.columns |
| 167 | + |
| 168 | +# ------------------------------------------------------------------ |
| 169 | +# Non-parametric Tests |
| 170 | +# ------------------------------------------------------------------ |
| 171 | + |
| 172 | +def test_wilcoxon(): |
| 173 | + """Test Wilcoxon signed-rank test.""" |
| 174 | + for cls in TEST_CLASSES: |
| 175 | + df = _make_sample(cls, design='within') |
| 176 | + t1_scores = df[df['time'] == 'T1']['score'] |
| 177 | + t2_scores = df[df['time'] == 'T2']['score'] |
| 178 | + min_len = min(len(t1_scores), len(t2_scores)) |
| 179 | + result = pg.wilcoxon(t1_scores[:min_len], t2_scores[:min_len]) |
| 180 | + assert 'W_val' in result |
| 181 | + assert 'p_val' in result |
| 182 | + |
| 183 | +def test_mann_whitney(): |
| 184 | + """Test Mann-Whitney U test.""" |
| 185 | + for cls in TEST_CLASSES: |
| 186 | + df = _make_sample(cls, design='between') |
| 187 | + group_a = df[df['group'] == 'A']['score'] |
| 188 | + group_b = df[df['group'] == 'B']['score'] |
| 189 | + result = pg.mwu(group_a, group_b) |
| 190 | + assert 'U_val' in result |
| 191 | + assert 'p_val' in result |
| 192 | + |
| 193 | +def test_kruskal(): |
| 194 | + """Test Kruskal-Wallis test.""" |
| 195 | + for cls in TEST_CLASSES: |
| 196 | + df = _make_sample(cls, design='between') |
| 197 | + result = pg.kruskal(data=df, dv='score', between='group') |
| 198 | + assert 'H' in result.columns |
| 199 | + assert 'p_unc' in result.columns |
| 200 | + |
| 201 | +# ------------------------------------------------------------------ |
| 202 | +# Regression Tests |
| 203 | +# ------------------------------------------------------------------ |
| 204 | + |
| 205 | +def test_linear_regression(): |
| 206 | + """Test linear regression.""" |
| 207 | + for cls in TEST_CLASSES: |
| 208 | + df = _make_sample(cls, design='correlation') |
| 209 | + result = pg.linear_regression(df[['x', 'z']], df['y']) |
| 210 | + assert 'coef' in result.columns |
| 211 | + assert 'pval' in result.columns |
| 212 | + |
| 213 | +def test_logistic_regression(): |
| 214 | + """Test logistic regression.""" |
| 215 | + for cls in TEST_CLASSES: |
| 216 | + df = _make_sample(cls, design='between') |
| 217 | + # Create binary outcome |
| 218 | + |
| 219 | + df['outcome'] = 0 |
| 220 | + for i, row in df.iterrows(): |
| 221 | + if row['score'] > df['score'].median(): |
| 222 | + df['outcome'][i] = 1 |
| 223 | + result = pg.logistic_regression(df[['age']], df['outcome']) |
| 224 | + assert 'coef' in result.columns |
| 225 | + assert 'pval' in result.columns |
| 226 | + |
| 227 | +# ------------------------------------------------------------------ |
| 228 | +# Effect Size Tests |
| 229 | +# ------------------------------------------------------------------ |
| 230 | + |
| 231 | +def test_effect_size(): |
| 232 | + """Test effect size calculations.""" |
| 233 | + for cls in TEST_CLASSES: |
| 234 | + df = _make_sample(cls, design='between') |
| 235 | + group_a = df[df['group'] == 'A']['score'] |
| 236 | + group_b = df[df['group'] == 'B']['score'] |
| 237 | + |
| 238 | + # Cohen's d |
| 239 | + d = pg.compute_effsize(group_a, group_b, eftype='cohen') |
| 240 | + assert isinstance(d, (int, float)) |
| 241 | + |
| 242 | + # Hedge's g |
| 243 | + g = pg.compute_effsize(group_a, group_b, eftype='hedges') |
| 244 | + assert isinstance(g, (int, float)) |
| 245 | + |
| 246 | +# ------------------------------------------------------------------ |
| 247 | +# Normality Tests |
| 248 | +# ------------------------------------------------------------------ |
| 249 | + |
| 250 | +def test_normality(): |
| 251 | + """Test normality tests.""" |
| 252 | + for cls in TEST_CLASSES: |
| 253 | + df = _make_sample(cls, design='between') |
| 254 | + result = pg.normality(df['score']) |
| 255 | + assert 'W' in result.columns |
| 256 | + assert 'pval' in result.columns |
| 257 | + |
| 258 | +def test_normality_grouped(): |
| 259 | + """Test normality tests by group.""" |
| 260 | + for cls in TEST_CLASSES: |
| 261 | + df = _make_sample(cls, design='between') |
| 262 | + result = pg.normality(data=df, dv='score', group='group') |
| 263 | + assert len(result) == 2 # Two groups |
| 264 | + assert 'W' in result.columns |
| 265 | + assert 'pval' in result.columns |
| 266 | + |
| 267 | +# ------------------------------------------------------------------ |
| 268 | +# Post-hoc Tests |
| 269 | +# ------------------------------------------------------------------ |
| 270 | + |
| 271 | +def test_pairwise_ttests(): |
| 272 | + """Test pairwise t-tests.""" |
| 273 | + for cls in TEST_CLASSES: |
| 274 | + df = _make_sample(cls, design='within') |
| 275 | + result = pg.pairwise_ttests(data=df, dv='score', within='time', |
| 276 | + subject='subject', padjust='bonf') |
| 277 | + assert 'T' in result.columns |
| 278 | + assert 'p_unc' in result.columns |
| 279 | + assert 'p_corr' in result.columns |
0 commit comments