Skip to content

Commit a26b91d

Browse files
committed
Add reviewer variability statistics reporting
1 parent 2afba78 commit a26b91d

2 files changed

Lines changed: 254 additions & 91 deletions

File tree

analysis_pipeline/stage6_build_publication_report.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33
import argparse
44
import glob
55
import json
6+
import math
67
from datetime import datetime, timezone
78
from pathlib import Path
89
from typing import Any
910

1011
import pandas as pd
1112

13+
try:
14+
from scipy.stats import t as student_t
15+
except ImportError: # pragma: no cover - report still works with normal approximation
16+
student_t = None
17+
1218

1319
def _analysis_root() -> Path:
1420
return Path(__file__).resolve().parent
@@ -179,6 +185,57 @@ def _as_float(value: Any) -> float | None:
179185
return None
180186

181187

188+
def _finite_series(values: pd.Series) -> pd.Series:
189+
numeric = pd.to_numeric(values, errors="coerce")
190+
return numeric.dropna()
191+
192+
193+
def _ci_multiplier(n: int) -> float:
194+
if n <= 1:
195+
return 0.0
196+
if student_t is not None:
197+
return float(student_t.ppf(0.975, df=n - 1))
198+
return 1.96
199+
200+
201+
def _describe_metric(values: pd.Series) -> dict[str, float | int | None]:
202+
clean = _finite_series(values)
203+
n = int(clean.shape[0])
204+
if n == 0:
205+
return {
206+
"n": 0,
207+
"mean": None,
208+
"sd": None,
209+
"se": None,
210+
"ci95_low": None,
211+
"ci95_high": None,
212+
"min": None,
213+
"max": None,
214+
}
215+
mean_value = float(clean.mean())
216+
if n == 1:
217+
sd_value = 0.0
218+
se_value = 0.0
219+
ci_low = mean_value
220+
ci_high = mean_value
221+
else:
222+
sd_value = float(clean.std(ddof=1))
223+
se_value = float(sd_value / math.sqrt(n))
224+
margin = _ci_multiplier(n) * se_value
225+
ci_low = mean_value - margin
226+
ci_high = mean_value + margin
227+
return {
228+
"n": n,
229+
"mean": mean_value,
230+
"sd": sd_value,
231+
"se": se_value,
232+
"ci95_low": float(ci_low),
233+
"ci95_high": float(ci_high),
234+
"min": float(clean.min()),
235+
"max": float(clean.max()),
236+
}
237+
238+
182239
def _collect_stage6_result_paths(explicit: list[str] | None, glob_pattern: str | None) -> list[Path]:
183240
paths: list[Path] = []
184241
for item in explicit or []:
@@ -303,6 +360,42 @@ def _build_global_best_table(best_df: pd.DataFrame) -> pd.DataFrame:
303360
return pd.DataFrame(rows).sort_values(["dataset", "protocol"]).reset_index(drop=True)
304361

305362

363+
def _build_modality_protocol_variability_table(best_df: pd.DataFrame) -> pd.DataFrame:
364+
if best_df.empty:
365+
return pd.DataFrame()
366+
required = {"dataset", "protocol", "scenario", "balanced_accuracy_mean", "macro_f1_mean"}
367+
if not required.issubset(set(best_df.columns)):
368+
return pd.DataFrame()
369+
370+
rows: list[dict[str, Any]] = []
371+
grouped = best_df.groupby(["dataset", "protocol"], sort=True)
372+
for (dataset, protocol), group in grouped:
373+
ba = _describe_metric(group["balanced_accuracy_mean"])
374+
f1 = _describe_metric(group["macro_f1_mean"])
375+
scenario_names = sorted(str(item) for item in group["scenario"].dropna().unique())
376+
rows.append(
377+
{
378+
"dataset": str(dataset),
379+
"protocol": str(protocol),
380+
"n_scenarios": int(ba["n"]),
381+
"mean_balanced_accuracy": ba["mean"],
382+
"sd_balanced_accuracy": ba["sd"],
383+
"se_balanced_accuracy": ba["se"],
384+
"ci95_low_balanced_accuracy": ba["ci95_low"],
385+
"ci95_high_balanced_accuracy": ba["ci95_high"],
386+
"min_balanced_accuracy": ba["min"],
387+
"max_balanced_accuracy": ba["max"],
388+
"mean_macro_f1": f1["mean"],
389+
"sd_macro_f1": f1["sd"],
390+
"se_macro_f1": f1["se"],
391+
"ci95_low_macro_f1": f1["ci95_low"],
392+
"ci95_high_macro_f1": f1["ci95_high"],
393+
"scenarios": "; ".join(scenario_names),
394+
}
395+
)
396+
return pd.DataFrame(rows).sort_values(["dataset", "protocol"]).reset_index(drop=True)
397+
398+
306399
def _participants_with_drops(
307400
epoch_payload: dict[str, Any] | None,
308401
) -> tuple[list[str], list[str], list[str], pd.DataFrame]:
@@ -426,6 +519,7 @@ def _markdown_from_components(
426519
stage6_scenarios: pd.DataFrame,
427520
stage6_best: pd.DataFrame,
428521
stage6_global_best: pd.DataFrame,
522+
stage6_variability: pd.DataFrame,
429523
dropped_windows_table: pd.DataFrame,
430524
) -> str:
431525
lines: list[str] = []
@@ -640,6 +734,12 @@ def _markdown_from_components(
640734
if not stage6_global_best.empty:
641735
lines.append("### Global Best Pipeline per Dataset and Protocol")
642736
lines.append(_to_markdown_table(stage6_global_best))
737+
if not stage6_variability.empty:
738+
lines.append("### Modality/Protocol Variability Across Class Scenarios")
739+
lines.append(
740+
"_Values summarise the best-pipeline balanced-accuracy and macro-F1 means across loaded class-scenario result files._"
741+
)
742+
lines.append(_to_markdown_table(stage6_variability, decimals=4))
643743
lines.append("")
644744

645745
stage1_strict_state = _stage1_strict_mode(run_manifest_payload)
@@ -743,6 +843,7 @@ def main() -> None:
743843

744844
stage6_scenarios, stage6_best, _stage6_agg = _build_stage6_frames(result_paths)
745845
stage6_global_best = _build_global_best_table(stage6_best)
846+
stage6_variability = _build_modality_protocol_variability_table(stage6_best)
746847

747848
eeg_drop_subjects, ecg_drop_subjects, pupil_drop_subjects, dropped_windows_table = _participants_with_drops(
748849
epoch_payload
@@ -765,6 +866,7 @@ def main() -> None:
765866
stage6_scenarios=stage6_scenarios,
766867
stage6_best=stage6_best,
767868
stage6_global_best=stage6_global_best,
869+
stage6_variability=stage6_variability,
768870
dropped_windows_table=dropped_windows_table,
769871
)
770872

@@ -786,6 +888,7 @@ def main() -> None:
786888
"scenario_rows": stage6_scenarios.to_dict(orient="records"),
787889
"best_rows": stage6_best.to_dict(orient="records"),
788890
"global_best_by_dataset_protocol": stage6_global_best.to_dict(orient="records"),
891+
"modality_protocol_variability": stage6_variability.to_dict(orient="records"),
789892
},
790893
"segmentation_dropped_windows": {
791894
"subjects_with_eeg_drops": eeg_drop_subjects,

0 commit comments

Comments
 (0)