SensorFusionKnowledgeDistillationCompositeHAR/run_cross_validation.py at main · Tiny-Composite-and-Complex-ADL/SensorFusionKnowledgeDistillationCompositeHAR · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
This script is responsible for K-fold (i.e., 5-fold and 10-fold) cross-validation for DualCrossTAP
"""

import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import os
from datetime import datetime

from config import *
from models.teacher import create_enhanced_hybrid_model
from models.utils import load_raw_data, ACTIVITY_NAMES


def run_cross_validation(k_folds=5):
    # Load all data
    print("Loading datasets for Cross-Validation...")
    X_train_raw, y_train = load_raw_data(os.path.join(DATA_DIR, X_TRAIN_PATH), os.path.join(DATA_DIR, Y_TRAIN_PATH))
    X_test_raw, y_test = load_raw_data(os.path.join(DATA_DIR, X_TEST_PATH), os.path.join(DATA_DIR, Y_TEST_PATH))

    # Combine for CV (as per your original paper methodology)
    X_combined = np.concatenate([X_train_raw, X_test_raw], axis=0)
    y_combined = np.concatenate([y_train, y_test], axis=0)

    # Pre-slice modalities to save memory during loop
    # Shape: (N, 300, 16) flattened to (N, 4800) for Pressure
    # Shape: (N, 300, 9) flattened to (N, 2700) for IMU
    X_pressure = X_combined[:, :, :16].reshape(X_combined.shape[0], -1)
    X_imu = X_combined[:, :, 16:].reshape(X_combined.shape[0], -1)

    print(f"Total samples for CV: {len(y_combined)}")
    print(f"IMU Shape: {X_imu.shape}, Pressure Shape: {X_pressure.shape}")

    # Initialize K-Fold
    kf = KFold(n_splits=k_folds, shuffle=True, random_state=RANDOM_SEED)

    results_dir = "cross_validation_results"
    os.makedirs(results_dir, exist_ok=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    results_file = os.path.join(results_dir, f"{k_folds}_fold_cv_results_{timestamp}.txt")

    fold_accuracies = []

    with open(results_file, 'w') as f:
        f.write(f"Cross-Validation Results ({k_folds}-Fold) - {timestamp}\n")
        f.write("=" * 50 + "\n\n")

        # CV Loop
        for fold, (train_idx, val_idx) in enumerate(kf.split(X_imu), 1):
            print(f"\n--- Fold {fold}/{k_folds} ---")

            # Split Data
            X_tr_imu, X_val_imu = X_imu[train_idx], X_imu[val_idx]
            X_tr_press, X_val_press = X_pressure[train_idx], X_pressure[val_idx]
            y_tr, y_val = y_combined[train_idx], y_combined[val_idx]

            # Clear session to prevent memory leak
            tf.keras.backend.clear_session()

            # Create Model (Imported from models/teacher.py!)
            model = create_enhanced_hybrid_model(
                imu_shape=(X_tr_imu.shape[1],),
                pressure_shape=(X_tr_press.shape[1],),
                num_classes=NUM_CLASSES
            )

            model.compile(
                optimizer=tf.optimizers.AdamW(learning_rate=TEACHER_LEARNING_RATE, weight_decay=TEACHER_WEIGHT_DECAY),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy']
            )

            # Define Callbacks
            callbacks = [
                # CustomCallback(), # Optional: uncomment if you want batch-level logging
                tf.keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss', factor=TEACHER_LR_FACTOR, patience=TEACHER_PATIENCE, verbose=1
                ),
                tf.keras.callbacks.EarlyStopping(
                    monitor='val_loss', patience=10, restore_best_weights=True, verbose=1
                )
            ]

            # Train
            model.fit(
                [X_tr_imu, X_tr_press], y_tr,
                batch_size=TEACHER_BATCH_SIZE,
                epochs=TEACHER_EPOCHS,
                validation_data=([X_val_imu, X_val_press], y_val),
                callbacks=callbacks,
                verbose=1
            )

            # Evaluate
            loss, accuracy = model.evaluate([X_val_imu, X_val_press], y_val, verbose=0)
            fold_accuracies.append(accuracy)
            print(f"Fold {fold} Accuracy: {accuracy:.4f}")

            # Predictions for Report
            y_pred_probs = model.predict([X_val_imu, X_val_press], verbose=0)
            y_pred = np.argmax(y_pred_probs, axis=1)

            # Log to file
            f.write(f"\n--- Fold {fold} ---\n")
            f.write(f"Accuracy: {accuracy:.5f}\n")
            f.write(classification_report(y_val, y_pred, target_names=ACTIVITY_NAMES, digits=4))

        # Final Summary
        mean_acc = np.mean(fold_accuracies)
        std_acc = np.std(fold_accuracies)

        summary = f"\n\nOveral Results:\nMean Accuracy: {mean_acc * 100:.2f}% (+/- {std_acc * 100:.2f}%)"
        print(summary)
        f.write(summary)

    print(f"\nFull report saved to: {results_file}")


if __name__ == "__main__":
    # Setup GPU
    physical_devices = tf.config.list_physical_devices('GPU')
    if physical_devices:
        for device in physical_devices:
            tf.config.experimental.set_memory_growth(device, True)

    # Run with 5 folds or 10 folds
    run_cross_validation(k_folds=5)