LMBooth
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎analysis_pipeline/config/pipeline_with_deep_models.yaml‎
Lines changed: 19 additions & 2 deletions b/‎analysis_pipeline/config/pipeline_with_deep_models.yaml‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎analysis_pipeline/stage4_extract_features.py‎
Lines changed: 59 additions & 1 deletion b/‎analysis_pipeline/stage4_extract_features.py‎
Lines changed: 59 additions & 1 deletion
diff --git a/‎docs/autodraft_methods_results_all_bins.md‎
Lines changed: 53 additions & 0 deletions b/‎docs/autodraft_methods_results_all_bins.md‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎docs/autodraft_publication_summary_all_bins.md‎
Lines changed: 125 additions & 0 deletions b/‎docs/autodraft_publication_summary_all_bins.md‎
Lines changed: 125 additions & 0 deletions
@@ -92,6 +92,7 @@ python .\analysis_pipeline\run_pipeline.py `
 - Stage 6: split-aware ML benchmarking.
 
 See `docs/pipeline_methods.md` for explicit methodological details, including how fixed 6-second arithmetic windows are converted into epochs and optional sub-windows.
+For exact feature/model implementation details, see `docs/feature_ml_reference.md`.
 
 ## Notes for Manuscript Framing
 
 
@@ -12,11 +12,17 @@ stages:
   stage1: false
   stage2: false
   stage3: false
-  stage4: false
-  stage5: false
+  stage4: true
+  stage5: true
   stage6: true
   stage6_confusions: true
 
+stage_args:
+  stage4: {}
+  stage5:
+    dropout_policy: "absolute"
+    dropout_threshold: 35.0
+
 stage6:
   run_tag_prefix: "deep_models"
   results_json_template: "analysis_pipeline/reports/ml_results_{scenario}_deep_models.json"
@@ -33,6 +39,17 @@ stage6:
     random_seed: 42
   class_scenarios:
     - name: "all_bins"
+    - name: "omit_easiest"
+      drop_labels: ["0.6-1.5"]
+    - name: "three_level_merged"
+      merge_map:
+        "0.6-1.5": "low"
+        "1.5-2.4": "low"
+        "2.4-3.3": "mid"
+        "3.3-4.2": "mid"
+        "4.2-5.1": "high"
+        "5.1-6.0": "high"
+        "6.0-6.9": "high"
 
 stage6_confusions:
   out_json_template: "analysis_pipeline/reports/confusion_highlights_{scenario}_deep_models.json"
 
@@ -229,6 +229,27 @@ def _iqr(values: np.ndarray) -> float | None:
     return p75 - p25
 
 
+def _zero_crossing_count(signal_1d: np.ndarray) -> int | None:
+    x = signal_1d[np.isfinite(signal_1d)]
+    if x.size < 2:
+        return None
+    signs = np.sign(x)
+    nonzero = signs != 0
+    if int(np.sum(nonzero)) < 2:
+        return 0
+    nz = signs[nonzero]
+    return int(np.sum(nz[1:] * nz[:-1] < 0))
+
+
+def _slope_sign_changes_count(signal_1d: np.ndarray) -> int | None:
+    x = signal_1d[np.isfinite(signal_1d)]
+    if x.size < 3:
+        return None
+    left = x[1:-1] - x[:-2]
+    right = x[1:-1] - x[2:]
+    return int(np.sum((left * right) > 0))
+
+
 def _normalize_ch_name(name: str) -> str:
     text = name.strip().upper()
     alias = {
@@ -370,11 +391,38 @@ def _compute_eeg_roi_features(
         roi_data = data[idx, :]
         var_vals = np.var(roi_data, axis=1)
         rms_vals = np.sqrt(np.mean(roi_data**2, axis=1))
+        mav_vals = np.mean(np.abs(roi_data), axis=1)
+        mean_power_vals = np.mean(roi_data**2, axis=1)
+        median_power_vals = np.median(roi_data**2, axis=1)
         ll_vals = np.mean(np.abs(np.diff(roi_data, axis=1)), axis=1) if roi_data.shape[1] > 1 else np.array([])
+        zc_counts: list[int] = []
+        zc_rates: list[float] = []
+        ssc_counts: list[int] = []
+        ssc_rates: list[float] = []
+        for ch in roi_data:
+            zc = _zero_crossing_count(ch)
+            if zc is not None:
+                zc_counts.append(zc)
+                denom = max(ch.size - 1, 1)
+                zc_rates.append(float(zc) / float(denom))
+            ssc = _slope_sign_changes_count(ch)
+            if ssc is not None:
+                ssc_counts.append(ssc)
+                denom = max(ch.size - 2, 1)
+                ssc_rates.append(float(ssc) / float(denom))
         features[f"eeg_var_{roi}"] = float(np.mean(var_vals))
         features[f"eeg_rms_{roi}"] = float(np.mean(rms_vals))
+        features[f"eeg_mav_{roi}"] = float(np.mean(mav_vals))
+        features[f"eeg_mean_power_{roi}"] = float(np.mean(mean_power_vals))
+        features[f"eeg_median_power_{roi}"] = float(np.mean(median_power_vals))
         if ll_vals.size:
             features[f"eeg_line_length_{roi}"] = float(np.mean(ll_vals))
+        if zc_counts:
+            features[f"eeg_zero_crossings_{roi}"] = float(np.mean(zc_counts))
+            features[f"eeg_zero_crossings_rate_{roi}"] = float(np.mean(zc_rates))
+        if ssc_counts:
+            features[f"eeg_ssc_{roi}"] = float(np.mean(ssc_counts))
+            features[f"eeg_ssc_rate_{roi}"] = float(np.mean(ssc_rates))
 
         hj_activity: list[float] = []
         hj_mobility: list[float] = []
@@ -500,7 +548,13 @@ def _compute_ecg_peak_features(times: np.ndarray, peak_sig: np.ndarray) -> dict[
     p75 = np.percentile(rr_ms, 75.0)
     p25 = np.percentile(rr_ms, 25.0)
     out["ecg_rr_iqr_ms"] = float(p75 - p25)
-    out["ecg_pnn50_pct"] = float(100.0 * np.mean(np.abs(rr_diff_ms) > 50.0)) if rr_diff_ms.size else None
+    if rr_diff_ms.size:
+        nn50_count = int(np.sum(np.abs(rr_diff_ms) > 50.0))
+        out["ecg_nn50_count"] = nn50_count
+        out["ecg_pnn50_pct"] = float(100.0 * nn50_count / rr_diff_ms.size)
+    else:
+        out["ecg_nn50_count"] = None
+        out["ecg_pnn50_pct"] = None
     out["ecg_rr_cv"] = float(np.std(rr_ms) / np.mean(rr_ms)) if np.mean(rr_ms) > 0 else None
     out["ecg_quality_flag"] = "ok" if int(peaks.size) >= 3 else "insufficient_beats"
     return out
@@ -560,6 +614,10 @@ def _compute_pupil_features(
         early_ref = float(valid_pupil[0])
     peak = float(np.max(valid_pupil))
     out["pupil_peak_dilation"] = peak - early_ref
+    peak_idx = int(np.argmax(valid_pupil))
+    peak_time_s = float(rel_t[peak_idx])
+    out["pupil_peak_time_s"] = peak_time_s
+    out["pupil_tepr_latency_s"] = max(0.0, peak_time_s - early_window)
 
     dt = np.diff(valid_times)
     good_dt = dt > 0
 
@@ -0,0 +1,53 @@
+# Draft Manuscript Sections: Methods and Results
+
+> This is a draft scaffold generated from current pipeline outputs. Edit for journal style and narrative flow.
+
+## Methods (Draft)
+
+### Dataset and processing overview
+We analyzed a BIDS-formatted arithmetic multimodal dataset (EEG, ECG, and pupil), and processed it with a staged pipeline that included trial-table construction, QC, preprocessing, epoching, feature extraction, feature-table fusion, and split-aware machine-learning evaluation.
+
+### Feature and table construction
+Unimodal feature tables were built independently for EEG, ECG, and pupil, then aligned into a fused multimodal table. All model training used leak-safe fold-local preprocessing (imputation, clipping, robust scaling, variance filtering, optional feature selection).
+
+### Classification design
+Datasets evaluated: `eeg, ecg, pupil, fused`.
+Validation protocols: `loso, group_holdout, within_participant`.
+Model families: `logreg, knn, svm, gaussian_nb, decision_tree, mlp, rf`.
+Feature selectors: `none`.
+Primary metrics were balanced accuracy, macro-F1, weighted-F1, and accuracy, with confusion matrices aggregated for top-ranked pipelines by dataset and protocol.
+
+### Reproducibility
+All run-level outputs and configuration metadata were stored in structured JSON artifacts; this draft references `analysis_pipeline\reports\ml_results_all_bins.json` as the primary result source.
+
+## Results (Draft)
+
+The analyzed run (`analysis_pipeline\reports\ml_results_all_bins.json`) produced `392` evaluations and `84` aggregate pipeline rows for scenario `all_bins`.
+
+### Best pipeline by modality and protocol
+| dataset | protocol | best_model | best_feature_selector | best_pipeline | balanced_accuracy_mean | macro_f1_mean |
+|---|---|---|---|---|---|---|
+| ecg | group_holdout | rf | none | rf+none | 0.1947 | 0.1863 |
+| ecg | loso | rf | none | rf+none | 0.2014 | 0.1801 |
+| ecg | within_participant | rf | none | rf+none | 0.2143 | 0.1803 |
+| eeg | group_holdout | knn | none | knn+none | 0.1914 | 0.1823 |
+| eeg | loso | mlp | none | mlp+none | 0.2484 | 0.1977 |
+| eeg | within_participant | mlp | none | mlp+none | 0.3071 | 0.2774 |
+| fused | group_holdout | rf | none | rf+none | 0.2173 | 0.1907 |
+| fused | loso | rf | none | rf+none | 0.2063 | 0.1295 |
+| fused | within_participant | logreg | none | logreg+none | 0.3000 | 0.2571 |
+| pupil | group_holdout | decision_tree | none | decision_tree+none | 0.2260 | 0.2154 |
+| pupil | loso | decision_tree | none | decision_tree+none | 0.2004 | 0.1824 |
+| pupil | within_participant | rf | none | rf+none | 0.3571 | 0.3001 |
+
+### Modality-level top-performing pipelines
+| dataset | top_pipeline | top_protocol | top_balanced_accuracy | top_macro_f1 |
+|---|---|---|---|---|
+| ecg | rf+none | within_participant | 0.2143 | 0.1803 |
+| eeg | mlp+none | within_participant | 0.3071 | 0.2774 |
+| fused | logreg+none | within_participant | 0.3000 | 0.2571 |
+| pupil | rf+none | within_participant | 0.3571 | 0.3001 |
+
+### Interpretation notes
+In this run, fused multimodal tracks showed strong gains in multiple protocols, while specific unimodal tracks remained mixed. This supports a fusion-first baseline for main analyses, with unimodal tracks retained for mechanistic interpretation.
+
@@ -0,0 +1,125 @@
+# Model and Modality Classification Output Summary
+
+## Inputs
+- Results JSON: `analysis_pipeline\reports\ml_results_all_bins.json`
+
+## Run Snapshot
+- Scenario: `all_bins`
+- Datasets: `eeg, ecg, pupil, fused`
+- Protocols: `loso, group_holdout, within_participant`
+- Models: `logreg, knn, svm, gaussian_nb, decision_tree, mlp, rf`
+- Feature selectors: `none`
+- Evaluations: `392`
+- Aggregate rows: `84`
+
+## Best Pipeline by Modality and Protocol
+| dataset | protocol | best_model | best_feature_selector | best_pipeline | balanced_accuracy_mean | macro_f1_mean | n_evaluations |
+|---|---|---|---|---|---|---|---|
+| ecg | group_holdout | rf | none | rf+none | 0.1947 | 0.1863 | 2 |
+| ecg | loso | rf | none | rf+none | 0.2014 | 0.1801 | 2 |
+| ecg | within_participant | rf | none | rf+none | 0.2143 | 0.1803 | 10 |
+| eeg | group_holdout | knn | none | knn+none | 0.1914 | 0.1823 | 2 |
+| eeg | loso | mlp | none | mlp+none | 0.2484 | 0.1977 | 2 |
+| eeg | within_participant | mlp | none | mlp+none | 0.3071 | 0.2774 | 10 |
+| fused | group_holdout | rf | none | rf+none | 0.2173 | 0.1907 | 2 |
+| fused | loso | rf | none | rf+none | 0.2063 | 0.1295 | 2 |
+| fused | within_participant | logreg | none | logreg+none | 0.3000 | 0.2571 | 10 |
+| pupil | group_holdout | decision_tree | none | decision_tree+none | 0.2260 | 0.2154 | 2 |
+| pupil | loso | decision_tree | none | decision_tree+none | 0.2004 | 0.1824 | 2 |
+| pupil | within_participant | rf | none | rf+none | 0.3571 | 0.3001 | 10 |
+
+## Protocol-Level Summary by Modality
+| dataset | protocol | n_models | n_pipelines | mean_balanced_accuracy | max_balanced_accuracy | mean_macro_f1 |
+|---|---|---|---|---|---|---|
+| ecg | group_holdout | 7 | 7 | 0.1616 | 0.1947 | 0.1474 |
+| ecg | loso | 7 | 7 | 0.1655 | 0.2014 | 0.1252 |
+| ecg | within_participant | 7 | 7 | 0.1929 | 0.2143 | 0.1519 |
+| eeg | group_holdout | 7 | 7 | 0.1682 | 0.1914 | 0.1458 |
+| eeg | loso | 7 | 7 | 0.1928 | 0.2484 | 0.1313 |
+| eeg | within_participant | 7 | 7 | 0.2684 | 0.3071 | 0.2333 |
+| fused | group_holdout | 7 | 7 | 0.1687 | 0.2173 | 0.1349 |
+| fused | loso | 7 | 7 | 0.1556 | 0.2063 | 0.1100 |
+| fused | within_participant | 7 | 7 | 0.2173 | 0.3000 | 0.1713 |
+| pupil | group_holdout | 7 | 7 | 0.1753 | 0.2260 | 0.1336 |
+| pupil | loso | 7 | 7 | 0.1601 | 0.2004 | 0.0920 |
+| pupil | within_participant | 7 | 7 | 0.2796 | 0.3571 | 0.2148 |
+
+## Model Performance by Modality
+| dataset | model | n_protocols | n_pipelines | mean_balanced_accuracy | max_balanced_accuracy | mean_macro_f1 | top_pipeline | top_pipeline_selector | top_pipeline_protocol |
+|---|---|---|---|---|---|---|---|---|---|
+| ecg | rf | 3 | 1 | 0.2035 | 0.2143 | 0.1822 | rf+none | none | within_participant |
+| ecg | knn | 3 | 1 | 0.1967 | 0.2143 | 0.1741 | knn+none | none | within_participant |
+| ecg | gaussian_nb | 3 | 1 | 0.1730 | 0.2143 | 0.1249 | gaussian_nb+none | none | within_participant |
+| ecg | svm | 3 | 1 | 0.1681 | 0.1929 | 0.1278 | svm+none | none | within_participant |
+| ecg | logreg | 3 | 1 | 0.1675 | 0.2000 | 0.1303 | logreg+none | none | within_participant |
+| ecg | mlp | 3 | 1 | 0.1625 | 0.1675 | 0.1344 | mlp+none | none | loso |
+| ecg | decision_tree | 3 | 1 | 0.1420 | 0.1500 | 0.1167 | decision_tree+none | none | within_participant |
+| eeg | mlp | 3 | 1 | 0.2457 | 0.3071 | 0.2142 | mlp+none | none | within_participant |
+| eeg | rf | 3 | 1 | 0.2163 | 0.2786 | 0.1614 | rf+none | none | within_participant |
+| eeg | svm | 3 | 1 | 0.2158 | 0.2857 | 0.1781 | svm+none | none | within_participant |
+| eeg | knn | 3 | 1 | 0.2110 | 0.2571 | 0.1805 | knn+none | none | within_participant |
+| eeg | gaussian_nb | 3 | 1 | 0.2025 | 0.2500 | 0.1458 | gaussian_nb+none | none | within_participant |
+| eeg | logreg | 3 | 1 | 0.1973 | 0.2714 | 0.1544 | logreg+none | none | within_participant |
+| eeg | decision_tree | 3 | 1 | 0.1801 | 0.2286 | 0.1567 | decision_tree+none | none | within_participant |
+| fused | rf | 3 | 1 | 0.2103 | 0.2173 | 0.1661 | rf+none | none | group_holdout |
+| fused | decision_tree | 3 | 1 | 0.1946 | 0.2571 | 0.1813 | decision_tree+none | none | within_participant |
+| fused | mlp | 3 | 1 | 0.1845 | 0.2071 | 0.1618 | mlp+none | none | within_participant |
+| fused | logreg | 3 | 1 | 0.1824 | 0.3000 | 0.1166 | logreg+none | none | within_participant |
+| fused | svm | 3 | 1 | 0.1770 | 0.2143 | 0.1357 | svm+none | none | within_participant |
+| fused | knn | 3 | 1 | 0.1675 | 0.1774 | 0.1552 | knn+none | none | group_holdout |
+| fused | gaussian_nb | 3 | 1 | 0.1478 | 0.1714 | 0.0542 | gaussian_nb+none | none | within_participant |
+| pupil | decision_tree | 3 | 1 | 0.2445 | 0.3071 | 0.2198 | decision_tree+none | none | within_participant |
+| pupil | rf | 3 | 1 | 0.2385 | 0.3571 | 0.1845 | rf+none | none | within_participant |
+| pupil | mlp | 3 | 1 | 0.2324 | 0.3071 | 0.1656 | mlp+none | none | within_participant |
+| pupil | svm | 3 | 1 | 0.2306 | 0.3214 | 0.1611 | svm+none | none | within_participant |
+| pupil | knn | 3 | 1 | 0.1991 | 0.2929 | 0.1632 | knn+none | none | within_participant |
+| pupil | gaussian_nb | 3 | 1 | 0.1502 | 0.1786 | 0.0550 | gaussian_nb+none | none | within_participant |
+| pupil | logreg | 3 | 1 | 0.1398 | 0.1929 | 0.0783 | logreg+none | none | within_participant |
+
+## Top Pipelines per Modality and Protocol
+| dataset | protocol | rank | model | feature_selector | pipeline_id | balanced_accuracy_mean | macro_f1_mean |
+|---|---|---|---|---|---|---|---|
+| ecg | group_holdout | 1 | rf | none | rf+none | 0.1947 | 0.1863 |
+| ecg | group_holdout | 2 | svm | none | svm+none | 0.1805 | 0.1621 |
+| ecg | group_holdout | 3 | knn | none | knn+none | 0.1790 | 0.1740 |
+| ecg | loso | 1 | rf | none | rf+none | 0.2014 | 0.1801 |
+| ecg | loso | 2 | knn | none | knn+none | 0.1969 | 0.1734 |
+| ecg | loso | 3 | logreg | none | logreg+none | 0.1752 | 0.1180 |
+| ecg | within_participant | 1 | rf | none | rf+none | 0.2143 | 0.1803 |
+| ecg | within_participant | 2 | knn | none | knn+none | 0.2143 | 0.1749 |
+| ecg | within_participant | 3 | gaussian_nb | none | gaussian_nb+none | 0.2143 | 0.1564 |
+| eeg | group_holdout | 1 | knn | none | knn+none | 0.1914 | 0.1823 |
+| eeg | group_holdout | 2 | mlp | none | mlp+none | 0.1815 | 0.1674 |
+| eeg | group_holdout | 3 | svm | none | svm+none | 0.1768 | 0.1455 |
+| eeg | loso | 1 | mlp | none | mlp+none | 0.2484 | 0.1977 |
+| eeg | loso | 2 | rf | none | rf+none | 0.2143 | 0.1041 |
+| eeg | loso | 3 | gaussian_nb | none | gaussian_nb+none | 0.1983 | 0.1118 |
+| eeg | within_participant | 1 | mlp | none | mlp+none | 0.3071 | 0.2774 |
+| eeg | within_participant | 2 | svm | none | svm+none | 0.2857 | 0.2500 |
+| eeg | within_participant | 3 | rf | none | rf+none | 0.2786 | 0.2429 |
+| fused | group_holdout | 1 | rf | none | rf+none | 0.2173 | 0.1907 |
+| fused | group_holdout | 2 | knn | none | knn+none | 0.1774 | 0.1727 |
+| fused | group_holdout | 3 | svm | none | svm+none | 0.1738 | 0.1528 |
+| fused | loso | 1 | rf | none | rf+none | 0.2063 | 0.1295 |
+| fused | loso | 2 | mlp | none | mlp+none | 0.1746 | 0.1539 |
+| fused | loso | 3 | knn | none | knn+none | 0.1607 | 0.1504 |
+| fused | within_participant | 1 | logreg | none | logreg+none | 0.3000 | 0.2571 |
+| fused | within_participant | 2 | decision_tree | none | decision_tree+none | 0.2571 | 0.2369 |
+| fused | within_participant | 3 | svm | none | svm+none | 0.2143 | 0.1299 |
+| pupil | group_holdout | 1 | decision_tree | none | decision_tree+none | 0.2260 | 0.2154 |
+| pupil | group_holdout | 2 | mlp | none | mlp+none | 0.2075 | 0.1444 |
+| pupil | group_holdout | 3 | svm | none | svm+none | 0.1957 | 0.1578 |
+| pupil | loso | 1 | decision_tree | none | decision_tree+none | 0.2004 | 0.1824 |
+| pupil | loso | 2 | mlp | none | mlp+none | 0.1825 | 0.1125 |
+| pupil | loso | 3 | svm | none | svm+none | 0.1746 | 0.0938 |
+| pupil | within_participant | 1 | rf | none | rf+none | 0.3571 | 0.3001 |
+| pupil | within_participant | 2 | svm | none | svm+none | 0.3214 | 0.2317 |
+| pupil | within_participant | 3 | mlp | none | mlp+none | 0.3071 | 0.2399 |
+
+## Exported Tables
+- All pipelines: `docs\tables\all_bins_publication\all_pipeline_aggregates.csv`
+- Best by track: `docs\tables\all_bins_publication\best_pipeline_by_modality_protocol.csv`
+- Protocol summary: `docs\tables\all_bins_publication\protocol_summary_by_modality.csv`
+- Model-by-modality summary: `docs\tables\all_bins_publication\model_summary_by_modality.csv`
+- Top pipelines: `docs\tables\all_bins_publication\top_pipelines_by_track.csv`
+