-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_cross_validation.py
More file actions
130 lines (102 loc) · 4.85 KB
/
run_cross_validation.py
File metadata and controls
130 lines (102 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
This script is responsible for K-fold (i.e., 5-fold and 10-fold) cross-validation for DualCrossTAP
"""
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
import os
from datetime import datetime
from config import *
from models.teacher import create_enhanced_hybrid_model
from models.utils import load_raw_data, ACTIVITY_NAMES
def run_cross_validation(k_folds=5):
# Load all data
print("Loading datasets for Cross-Validation...")
X_train_raw, y_train = load_raw_data(os.path.join(DATA_DIR, X_TRAIN_PATH), os.path.join(DATA_DIR, Y_TRAIN_PATH))
X_test_raw, y_test = load_raw_data(os.path.join(DATA_DIR, X_TEST_PATH), os.path.join(DATA_DIR, Y_TEST_PATH))
# Combine for CV (as per your original paper methodology)
X_combined = np.concatenate([X_train_raw, X_test_raw], axis=0)
y_combined = np.concatenate([y_train, y_test], axis=0)
# Pre-slice modalities to save memory during loop
# Shape: (N, 300, 16) flattened to (N, 4800) for Pressure
# Shape: (N, 300, 9) flattened to (N, 2700) for IMU
X_pressure = X_combined[:, :, :16].reshape(X_combined.shape[0], -1)
X_imu = X_combined[:, :, 16:].reshape(X_combined.shape[0], -1)
print(f"Total samples for CV: {len(y_combined)}")
print(f"IMU Shape: {X_imu.shape}, Pressure Shape: {X_pressure.shape}")
# Initialize K-Fold
kf = KFold(n_splits=k_folds, shuffle=True, random_state=RANDOM_SEED)
results_dir = "cross_validation_results"
os.makedirs(results_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_file = os.path.join(results_dir, f"{k_folds}_fold_cv_results_{timestamp}.txt")
fold_accuracies = []
with open(results_file, 'w') as f:
f.write(f"Cross-Validation Results ({k_folds}-Fold) - {timestamp}\n")
f.write("=" * 50 + "\n\n")
# CV Loop
for fold, (train_idx, val_idx) in enumerate(kf.split(X_imu), 1):
print(f"\n--- Fold {fold}/{k_folds} ---")
# Split Data
X_tr_imu, X_val_imu = X_imu[train_idx], X_imu[val_idx]
X_tr_press, X_val_press = X_pressure[train_idx], X_pressure[val_idx]
y_tr, y_val = y_combined[train_idx], y_combined[val_idx]
# Clear session to prevent memory leak
tf.keras.backend.clear_session()
# Create Model (Imported from models/teacher.py!)
model = create_enhanced_hybrid_model(
imu_shape=(X_tr_imu.shape[1],),
pressure_shape=(X_tr_press.shape[1],),
num_classes=NUM_CLASSES
)
model.compile(
optimizer=tf.optimizers.AdamW(learning_rate=TEACHER_LEARNING_RATE, weight_decay=TEACHER_WEIGHT_DECAY),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# Define Callbacks
callbacks = [
# CustomCallback(), # Optional: uncomment if you want batch-level logging
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss', factor=TEACHER_LR_FACTOR, patience=TEACHER_PATIENCE, verbose=1
),
tf.keras.callbacks.EarlyStopping(
monitor='val_loss', patience=10, restore_best_weights=True, verbose=1
)
]
# Train
model.fit(
[X_tr_imu, X_tr_press], y_tr,
batch_size=TEACHER_BATCH_SIZE,
epochs=TEACHER_EPOCHS,
validation_data=([X_val_imu, X_val_press], y_val),
callbacks=callbacks,
verbose=1
)
# Evaluate
loss, accuracy = model.evaluate([X_val_imu, X_val_press], y_val, verbose=0)
fold_accuracies.append(accuracy)
print(f"Fold {fold} Accuracy: {accuracy:.4f}")
# Predictions for Report
y_pred_probs = model.predict([X_val_imu, X_val_press], verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)
# Log to file
f.write(f"\n--- Fold {fold} ---\n")
f.write(f"Accuracy: {accuracy:.5f}\n")
f.write(classification_report(y_val, y_pred, target_names=ACTIVITY_NAMES, digits=4))
# Final Summary
mean_acc = np.mean(fold_accuracies)
std_acc = np.std(fold_accuracies)
summary = f"\n\nOveral Results:\nMean Accuracy: {mean_acc * 100:.2f}% (+/- {std_acc * 100:.2f}%)"
print(summary)
f.write(summary)
print(f"\nFull report saved to: {results_file}")
if __name__ == "__main__":
# Setup GPU
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
for device in physical_devices:
tf.config.experimental.set_memory_growth(device, True)
# Run with 5 folds or 10 folds
run_cross_validation(k_folds=5)