Skip to content

Commit f848303

Browse files
committed
Add feature/ML docs, new features, and deep-model pipeline config
1 parent c59c5f2 commit f848303

27 files changed

Lines changed: 1565 additions & 3 deletions

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ python .\analysis_pipeline\run_pipeline.py `
9292
- Stage 6: split-aware ML benchmarking.
9393

9494
See `docs/pipeline_methods.md` for explicit methodological details, including how fixed 6-second arithmetic windows are converted into epochs and optional sub-windows.
95+
For exact feature/model implementation details, see `docs/feature_ml_reference.md`.
9596

9697
## Notes for Manuscript Framing
9798

analysis_pipeline/config/pipeline_with_deep_models.yaml

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,17 @@ stages:
1212
stage1: false
1313
stage2: false
1414
stage3: false
15-
stage4: false
16-
stage5: false
15+
stage4: true
16+
stage5: true
1717
stage6: true
1818
stage6_confusions: true
1919

20+
stage_args:
21+
stage4: {}
22+
stage5:
23+
dropout_policy: "absolute"
24+
dropout_threshold: 35.0
25+
2026
stage6:
2127
run_tag_prefix: "deep_models"
2228
results_json_template: "analysis_pipeline/reports/ml_results_{scenario}_deep_models.json"
@@ -33,6 +39,17 @@ stage6:
3339
random_seed: 42
3440
class_scenarios:
3541
- name: "all_bins"
42+
- name: "omit_easiest"
43+
drop_labels: ["0.6-1.5"]
44+
- name: "three_level_merged"
45+
merge_map:
46+
"0.6-1.5": "low"
47+
"1.5-2.4": "low"
48+
"2.4-3.3": "mid"
49+
"3.3-4.2": "mid"
50+
"4.2-5.1": "high"
51+
"5.1-6.0": "high"
52+
"6.0-6.9": "high"
3653

3754
stage6_confusions:
3855
out_json_template: "analysis_pipeline/reports/confusion_highlights_{scenario}_deep_models.json"

analysis_pipeline/stage4_extract_features.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,27 @@ def _iqr(values: np.ndarray) -> float | None:
229229
return p75 - p25
230230

231231

232+
def _zero_crossing_count(signal_1d: np.ndarray) -> int | None:
233+
x = signal_1d[np.isfinite(signal_1d)]
234+
if x.size < 2:
235+
return None
236+
signs = np.sign(x)
237+
nonzero = signs != 0
238+
if int(np.sum(nonzero)) < 2:
239+
return 0
240+
nz = signs[nonzero]
241+
return int(np.sum(nz[1:] * nz[:-1] < 0))
242+
243+
244+
def _slope_sign_changes_count(signal_1d: np.ndarray) -> int | None:
245+
x = signal_1d[np.isfinite(signal_1d)]
246+
if x.size < 3:
247+
return None
248+
left = x[1:-1] - x[:-2]
249+
right = x[1:-1] - x[2:]
250+
return int(np.sum((left * right) > 0))
251+
252+
232253
def _normalize_ch_name(name: str) -> str:
233254
text = name.strip().upper()
234255
alias = {
@@ -370,11 +391,38 @@ def _compute_eeg_roi_features(
370391
roi_data = data[idx, :]
371392
var_vals = np.var(roi_data, axis=1)
372393
rms_vals = np.sqrt(np.mean(roi_data**2, axis=1))
394+
mav_vals = np.mean(np.abs(roi_data), axis=1)
395+
mean_power_vals = np.mean(roi_data**2, axis=1)
396+
median_power_vals = np.median(roi_data**2, axis=1)
373397
ll_vals = np.mean(np.abs(np.diff(roi_data, axis=1)), axis=1) if roi_data.shape[1] > 1 else np.array([])
398+
zc_counts: list[int] = []
399+
zc_rates: list[float] = []
400+
ssc_counts: list[int] = []
401+
ssc_rates: list[float] = []
402+
for ch in roi_data:
403+
zc = _zero_crossing_count(ch)
404+
if zc is not None:
405+
zc_counts.append(zc)
406+
denom = max(ch.size - 1, 1)
407+
zc_rates.append(float(zc) / float(denom))
408+
ssc = _slope_sign_changes_count(ch)
409+
if ssc is not None:
410+
ssc_counts.append(ssc)
411+
denom = max(ch.size - 2, 1)
412+
ssc_rates.append(float(ssc) / float(denom))
374413
features[f"eeg_var_{roi}"] = float(np.mean(var_vals))
375414
features[f"eeg_rms_{roi}"] = float(np.mean(rms_vals))
415+
features[f"eeg_mav_{roi}"] = float(np.mean(mav_vals))
416+
features[f"eeg_mean_power_{roi}"] = float(np.mean(mean_power_vals))
417+
features[f"eeg_median_power_{roi}"] = float(np.mean(median_power_vals))
376418
if ll_vals.size:
377419
features[f"eeg_line_length_{roi}"] = float(np.mean(ll_vals))
420+
if zc_counts:
421+
features[f"eeg_zero_crossings_{roi}"] = float(np.mean(zc_counts))
422+
features[f"eeg_zero_crossings_rate_{roi}"] = float(np.mean(zc_rates))
423+
if ssc_counts:
424+
features[f"eeg_ssc_{roi}"] = float(np.mean(ssc_counts))
425+
features[f"eeg_ssc_rate_{roi}"] = float(np.mean(ssc_rates))
378426

379427
hj_activity: list[float] = []
380428
hj_mobility: list[float] = []
@@ -500,7 +548,13 @@ def _compute_ecg_peak_features(times: np.ndarray, peak_sig: np.ndarray) -> dict[
500548
p75 = np.percentile(rr_ms, 75.0)
501549
p25 = np.percentile(rr_ms, 25.0)
502550
out["ecg_rr_iqr_ms"] = float(p75 - p25)
503-
out["ecg_pnn50_pct"] = float(100.0 * np.mean(np.abs(rr_diff_ms) > 50.0)) if rr_diff_ms.size else None
551+
if rr_diff_ms.size:
552+
nn50_count = int(np.sum(np.abs(rr_diff_ms) > 50.0))
553+
out["ecg_nn50_count"] = nn50_count
554+
out["ecg_pnn50_pct"] = float(100.0 * nn50_count / rr_diff_ms.size)
555+
else:
556+
out["ecg_nn50_count"] = None
557+
out["ecg_pnn50_pct"] = None
504558
out["ecg_rr_cv"] = float(np.std(rr_ms) / np.mean(rr_ms)) if np.mean(rr_ms) > 0 else None
505559
out["ecg_quality_flag"] = "ok" if int(peaks.size) >= 3 else "insufficient_beats"
506560
return out
@@ -560,6 +614,10 @@ def _compute_pupil_features(
560614
early_ref = float(valid_pupil[0])
561615
peak = float(np.max(valid_pupil))
562616
out["pupil_peak_dilation"] = peak - early_ref
617+
peak_idx = int(np.argmax(valid_pupil))
618+
peak_time_s = float(rel_t[peak_idx])
619+
out["pupil_peak_time_s"] = peak_time_s
620+
out["pupil_tepr_latency_s"] = max(0.0, peak_time_s - early_window)
563621

564622
dt = np.diff(valid_times)
565623
good_dt = dt > 0
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Draft Manuscript Sections: Methods and Results
2+
3+
> This is a draft scaffold generated from current pipeline outputs. Edit for journal style and narrative flow.
4+
5+
## Methods (Draft)
6+
7+
### Dataset and processing overview
8+
We analyzed a BIDS-formatted arithmetic multimodal dataset (EEG, ECG, and pupil), and processed it with a staged pipeline that included trial-table construction, QC, preprocessing, epoching, feature extraction, feature-table fusion, and split-aware machine-learning evaluation.
9+
10+
### Feature and table construction
11+
Unimodal feature tables were built independently for EEG, ECG, and pupil, then aligned into a fused multimodal table. All model training used leak-safe fold-local preprocessing (imputation, clipping, robust scaling, variance filtering, optional feature selection).
12+
13+
### Classification design
14+
Datasets evaluated: `eeg, ecg, pupil, fused`.
15+
Validation protocols: `loso, group_holdout, within_participant`.
16+
Model families: `logreg, knn, svm, gaussian_nb, decision_tree, mlp, rf`.
17+
Feature selectors: `none`.
18+
Primary metrics were balanced accuracy, macro-F1, weighted-F1, and accuracy, with confusion matrices aggregated for top-ranked pipelines by dataset and protocol.
19+
20+
### Reproducibility
21+
All run-level outputs and configuration metadata were stored in structured JSON artifacts; this draft references `analysis_pipeline\reports\ml_results_all_bins.json` as the primary result source.
22+
23+
## Results (Draft)
24+
25+
The analyzed run (`analysis_pipeline\reports\ml_results_all_bins.json`) produced `392` evaluations and `84` aggregate pipeline rows for scenario `all_bins`.
26+
27+
### Best pipeline by modality and protocol
28+
| dataset | protocol | best_model | best_feature_selector | best_pipeline | balanced_accuracy_mean | macro_f1_mean |
29+
|---|---|---|---|---|---|---|
30+
| ecg | group_holdout | rf | none | rf+none | 0.1947 | 0.1863 |
31+
| ecg | loso | rf | none | rf+none | 0.2014 | 0.1801 |
32+
| ecg | within_participant | rf | none | rf+none | 0.2143 | 0.1803 |
33+
| eeg | group_holdout | knn | none | knn+none | 0.1914 | 0.1823 |
34+
| eeg | loso | mlp | none | mlp+none | 0.2484 | 0.1977 |
35+
| eeg | within_participant | mlp | none | mlp+none | 0.3071 | 0.2774 |
36+
| fused | group_holdout | rf | none | rf+none | 0.2173 | 0.1907 |
37+
| fused | loso | rf | none | rf+none | 0.2063 | 0.1295 |
38+
| fused | within_participant | logreg | none | logreg+none | 0.3000 | 0.2571 |
39+
| pupil | group_holdout | decision_tree | none | decision_tree+none | 0.2260 | 0.2154 |
40+
| pupil | loso | decision_tree | none | decision_tree+none | 0.2004 | 0.1824 |
41+
| pupil | within_participant | rf | none | rf+none | 0.3571 | 0.3001 |
42+
43+
### Modality-level top-performing pipelines
44+
| dataset | top_pipeline | top_protocol | top_balanced_accuracy | top_macro_f1 |
45+
|---|---|---|---|---|
46+
| ecg | rf+none | within_participant | 0.2143 | 0.1803 |
47+
| eeg | mlp+none | within_participant | 0.3071 | 0.2774 |
48+
| fused | logreg+none | within_participant | 0.3000 | 0.2571 |
49+
| pupil | rf+none | within_participant | 0.3571 | 0.3001 |
50+
51+
### Interpretation notes
52+
In this run, fused multimodal tracks showed strong gains in multiple protocols, while specific unimodal tracks remained mixed. This supports a fusion-first baseline for main analyses, with unimodal tracks retained for mechanistic interpretation.
53+
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# Model and Modality Classification Output Summary
2+
3+
## Inputs
4+
- Results JSON: `analysis_pipeline\reports\ml_results_all_bins.json`
5+
6+
## Run Snapshot
7+
- Scenario: `all_bins`
8+
- Datasets: `eeg, ecg, pupil, fused`
9+
- Protocols: `loso, group_holdout, within_participant`
10+
- Models: `logreg, knn, svm, gaussian_nb, decision_tree, mlp, rf`
11+
- Feature selectors: `none`
12+
- Evaluations: `392`
13+
- Aggregate rows: `84`
14+
15+
## Best Pipeline by Modality and Protocol
16+
| dataset | protocol | best_model | best_feature_selector | best_pipeline | balanced_accuracy_mean | macro_f1_mean | n_evaluations |
17+
|---|---|---|---|---|---|---|---|
18+
| ecg | group_holdout | rf | none | rf+none | 0.1947 | 0.1863 | 2 |
19+
| ecg | loso | rf | none | rf+none | 0.2014 | 0.1801 | 2 |
20+
| ecg | within_participant | rf | none | rf+none | 0.2143 | 0.1803 | 10 |
21+
| eeg | group_holdout | knn | none | knn+none | 0.1914 | 0.1823 | 2 |
22+
| eeg | loso | mlp | none | mlp+none | 0.2484 | 0.1977 | 2 |
23+
| eeg | within_participant | mlp | none | mlp+none | 0.3071 | 0.2774 | 10 |
24+
| fused | group_holdout | rf | none | rf+none | 0.2173 | 0.1907 | 2 |
25+
| fused | loso | rf | none | rf+none | 0.2063 | 0.1295 | 2 |
26+
| fused | within_participant | logreg | none | logreg+none | 0.3000 | 0.2571 | 10 |
27+
| pupil | group_holdout | decision_tree | none | decision_tree+none | 0.2260 | 0.2154 | 2 |
28+
| pupil | loso | decision_tree | none | decision_tree+none | 0.2004 | 0.1824 | 2 |
29+
| pupil | within_participant | rf | none | rf+none | 0.3571 | 0.3001 | 10 |
30+
31+
## Protocol-Level Summary by Modality
32+
| dataset | protocol | n_models | n_pipelines | mean_balanced_accuracy | max_balanced_accuracy | mean_macro_f1 |
33+
|---|---|---|---|---|---|---|
34+
| ecg | group_holdout | 7 | 7 | 0.1616 | 0.1947 | 0.1474 |
35+
| ecg | loso | 7 | 7 | 0.1655 | 0.2014 | 0.1252 |
36+
| ecg | within_participant | 7 | 7 | 0.1929 | 0.2143 | 0.1519 |
37+
| eeg | group_holdout | 7 | 7 | 0.1682 | 0.1914 | 0.1458 |
38+
| eeg | loso | 7 | 7 | 0.1928 | 0.2484 | 0.1313 |
39+
| eeg | within_participant | 7 | 7 | 0.2684 | 0.3071 | 0.2333 |
40+
| fused | group_holdout | 7 | 7 | 0.1687 | 0.2173 | 0.1349 |
41+
| fused | loso | 7 | 7 | 0.1556 | 0.2063 | 0.1100 |
42+
| fused | within_participant | 7 | 7 | 0.2173 | 0.3000 | 0.1713 |
43+
| pupil | group_holdout | 7 | 7 | 0.1753 | 0.2260 | 0.1336 |
44+
| pupil | loso | 7 | 7 | 0.1601 | 0.2004 | 0.0920 |
45+
| pupil | within_participant | 7 | 7 | 0.2796 | 0.3571 | 0.2148 |
46+
47+
## Model Performance by Modality
48+
| dataset | model | n_protocols | n_pipelines | mean_balanced_accuracy | max_balanced_accuracy | mean_macro_f1 | top_pipeline | top_pipeline_selector | top_pipeline_protocol |
49+
|---|---|---|---|---|---|---|---|---|---|
50+
| ecg | rf | 3 | 1 | 0.2035 | 0.2143 | 0.1822 | rf+none | none | within_participant |
51+
| ecg | knn | 3 | 1 | 0.1967 | 0.2143 | 0.1741 | knn+none | none | within_participant |
52+
| ecg | gaussian_nb | 3 | 1 | 0.1730 | 0.2143 | 0.1249 | gaussian_nb+none | none | within_participant |
53+
| ecg | svm | 3 | 1 | 0.1681 | 0.1929 | 0.1278 | svm+none | none | within_participant |
54+
| ecg | logreg | 3 | 1 | 0.1675 | 0.2000 | 0.1303 | logreg+none | none | within_participant |
55+
| ecg | mlp | 3 | 1 | 0.1625 | 0.1675 | 0.1344 | mlp+none | none | loso |
56+
| ecg | decision_tree | 3 | 1 | 0.1420 | 0.1500 | 0.1167 | decision_tree+none | none | within_participant |
57+
| eeg | mlp | 3 | 1 | 0.2457 | 0.3071 | 0.2142 | mlp+none | none | within_participant |
58+
| eeg | rf | 3 | 1 | 0.2163 | 0.2786 | 0.1614 | rf+none | none | within_participant |
59+
| eeg | svm | 3 | 1 | 0.2158 | 0.2857 | 0.1781 | svm+none | none | within_participant |
60+
| eeg | knn | 3 | 1 | 0.2110 | 0.2571 | 0.1805 | knn+none | none | within_participant |
61+
| eeg | gaussian_nb | 3 | 1 | 0.2025 | 0.2500 | 0.1458 | gaussian_nb+none | none | within_participant |
62+
| eeg | logreg | 3 | 1 | 0.1973 | 0.2714 | 0.1544 | logreg+none | none | within_participant |
63+
| eeg | decision_tree | 3 | 1 | 0.1801 | 0.2286 | 0.1567 | decision_tree+none | none | within_participant |
64+
| fused | rf | 3 | 1 | 0.2103 | 0.2173 | 0.1661 | rf+none | none | group_holdout |
65+
| fused | decision_tree | 3 | 1 | 0.1946 | 0.2571 | 0.1813 | decision_tree+none | none | within_participant |
66+
| fused | mlp | 3 | 1 | 0.1845 | 0.2071 | 0.1618 | mlp+none | none | within_participant |
67+
| fused | logreg | 3 | 1 | 0.1824 | 0.3000 | 0.1166 | logreg+none | none | within_participant |
68+
| fused | svm | 3 | 1 | 0.1770 | 0.2143 | 0.1357 | svm+none | none | within_participant |
69+
| fused | knn | 3 | 1 | 0.1675 | 0.1774 | 0.1552 | knn+none | none | group_holdout |
70+
| fused | gaussian_nb | 3 | 1 | 0.1478 | 0.1714 | 0.0542 | gaussian_nb+none | none | within_participant |
71+
| pupil | decision_tree | 3 | 1 | 0.2445 | 0.3071 | 0.2198 | decision_tree+none | none | within_participant |
72+
| pupil | rf | 3 | 1 | 0.2385 | 0.3571 | 0.1845 | rf+none | none | within_participant |
73+
| pupil | mlp | 3 | 1 | 0.2324 | 0.3071 | 0.1656 | mlp+none | none | within_participant |
74+
| pupil | svm | 3 | 1 | 0.2306 | 0.3214 | 0.1611 | svm+none | none | within_participant |
75+
| pupil | knn | 3 | 1 | 0.1991 | 0.2929 | 0.1632 | knn+none | none | within_participant |
76+
| pupil | gaussian_nb | 3 | 1 | 0.1502 | 0.1786 | 0.0550 | gaussian_nb+none | none | within_participant |
77+
| pupil | logreg | 3 | 1 | 0.1398 | 0.1929 | 0.0783 | logreg+none | none | within_participant |
78+
79+
## Top Pipelines per Modality and Protocol
80+
| dataset | protocol | rank | model | feature_selector | pipeline_id | balanced_accuracy_mean | macro_f1_mean |
81+
|---|---|---|---|---|---|---|---|
82+
| ecg | group_holdout | 1 | rf | none | rf+none | 0.1947 | 0.1863 |
83+
| ecg | group_holdout | 2 | svm | none | svm+none | 0.1805 | 0.1621 |
84+
| ecg | group_holdout | 3 | knn | none | knn+none | 0.1790 | 0.1740 |
85+
| ecg | loso | 1 | rf | none | rf+none | 0.2014 | 0.1801 |
86+
| ecg | loso | 2 | knn | none | knn+none | 0.1969 | 0.1734 |
87+
| ecg | loso | 3 | logreg | none | logreg+none | 0.1752 | 0.1180 |
88+
| ecg | within_participant | 1 | rf | none | rf+none | 0.2143 | 0.1803 |
89+
| ecg | within_participant | 2 | knn | none | knn+none | 0.2143 | 0.1749 |
90+
| ecg | within_participant | 3 | gaussian_nb | none | gaussian_nb+none | 0.2143 | 0.1564 |
91+
| eeg | group_holdout | 1 | knn | none | knn+none | 0.1914 | 0.1823 |
92+
| eeg | group_holdout | 2 | mlp | none | mlp+none | 0.1815 | 0.1674 |
93+
| eeg | group_holdout | 3 | svm | none | svm+none | 0.1768 | 0.1455 |
94+
| eeg | loso | 1 | mlp | none | mlp+none | 0.2484 | 0.1977 |
95+
| eeg | loso | 2 | rf | none | rf+none | 0.2143 | 0.1041 |
96+
| eeg | loso | 3 | gaussian_nb | none | gaussian_nb+none | 0.1983 | 0.1118 |
97+
| eeg | within_participant | 1 | mlp | none | mlp+none | 0.3071 | 0.2774 |
98+
| eeg | within_participant | 2 | svm | none | svm+none | 0.2857 | 0.2500 |
99+
| eeg | within_participant | 3 | rf | none | rf+none | 0.2786 | 0.2429 |
100+
| fused | group_holdout | 1 | rf | none | rf+none | 0.2173 | 0.1907 |
101+
| fused | group_holdout | 2 | knn | none | knn+none | 0.1774 | 0.1727 |
102+
| fused | group_holdout | 3 | svm | none | svm+none | 0.1738 | 0.1528 |
103+
| fused | loso | 1 | rf | none | rf+none | 0.2063 | 0.1295 |
104+
| fused | loso | 2 | mlp | none | mlp+none | 0.1746 | 0.1539 |
105+
| fused | loso | 3 | knn | none | knn+none | 0.1607 | 0.1504 |
106+
| fused | within_participant | 1 | logreg | none | logreg+none | 0.3000 | 0.2571 |
107+
| fused | within_participant | 2 | decision_tree | none | decision_tree+none | 0.2571 | 0.2369 |
108+
| fused | within_participant | 3 | svm | none | svm+none | 0.2143 | 0.1299 |
109+
| pupil | group_holdout | 1 | decision_tree | none | decision_tree+none | 0.2260 | 0.2154 |
110+
| pupil | group_holdout | 2 | mlp | none | mlp+none | 0.2075 | 0.1444 |
111+
| pupil | group_holdout | 3 | svm | none | svm+none | 0.1957 | 0.1578 |
112+
| pupil | loso | 1 | decision_tree | none | decision_tree+none | 0.2004 | 0.1824 |
113+
| pupil | loso | 2 | mlp | none | mlp+none | 0.1825 | 0.1125 |
114+
| pupil | loso | 3 | svm | none | svm+none | 0.1746 | 0.0938 |
115+
| pupil | within_participant | 1 | rf | none | rf+none | 0.3571 | 0.3001 |
116+
| pupil | within_participant | 2 | svm | none | svm+none | 0.3214 | 0.2317 |
117+
| pupil | within_participant | 3 | mlp | none | mlp+none | 0.3071 | 0.2399 |
118+
119+
## Exported Tables
120+
- All pipelines: `docs\tables\all_bins_publication\all_pipeline_aggregates.csv`
121+
- Best by track: `docs\tables\all_bins_publication\best_pipeline_by_modality_protocol.csv`
122+
- Protocol summary: `docs\tables\all_bins_publication\protocol_summary_by_modality.csv`
123+
- Model-by-modality summary: `docs\tables\all_bins_publication\model_summary_by_modality.csv`
124+
- Top pipelines: `docs\tables\all_bins_publication\top_pipelines_by_track.csv`
125+

0 commit comments

Comments
 (0)