-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathmlp_correlation.py
More file actions
97 lines (80 loc) · 3.32 KB
/
mlp_correlation.py
File metadata and controls
97 lines (80 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
import matplotlib.pyplot as plt
class MLP(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim)
)
def forward(self, x):
return self.net(x)
def batch_mlp_corr(actions, num_epochs=10, batch_size=2048, val_ratio=0.2):
# actions with shape (num_envs, sequence, action_dim)
actions = torch.from_numpy(actions).float().cuda()
N, S, dim = actions.shape
model = MLP(dim, 512, dim).to(actions.device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()
x = actions[:, :-1, :] # Shape: (N, S-1, dim)
y = actions[:, 1:, :] # Shape: (N, S-1, dim)
# Reshape to combine batch and sequence dimensions
x = x.reshape(-1, dim) # Shape: ((N*(S-1)), dim)
y = y.reshape(-1, dim) # Shape: ((N*(S-1)), dim)
# Split x, y into train/val datasets
dataset = TensorDataset(x, y)
num_val = int(len(dataset) * val_ratio)
num_train = len(dataset) - num_val
train_dataset, val_dataset = random_split(dataset, [num_train, num_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
# Track losses
train_losses = []
val_losses = []
for epoch in range(num_epochs):
# Training loop
model.train()
total_train_loss = 0
num_train_batches = 0
for xb, yb in train_loader:
xb, yb = xb.to(actions.device), yb.to(actions.device)
optimizer.zero_grad()
preds = model(xb)
loss = criterion(preds, yb)
loss.backward()
optimizer.step()
total_train_loss += loss.item()
num_train_batches += 1
avg_train_loss = total_train_loss / num_train_batches if num_train_batches > 0 else float("inf")
train_losses.append(avg_train_loss)
# Validation loop
model.eval()
total_val_loss = 0
num_val_batches = 0
with torch.no_grad():
for xb, yb in val_loader:
xb, yb = xb.to(actions.device), yb.to(actions.device)
preds = model(xb)
loss = criterion(preds, yb)
total_val_loss += loss.item()
num_val_batches += 1
avg_val_loss = total_val_loss / num_val_batches if num_val_batches > 0 else float("inf")
val_losses.append(avg_val_loss)
print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {avg_train_loss:.4f} - Val Loss: {avg_val_loss:.4f}")
# # Plot training and validation loss
# plt.figure(figsize=(8, 5))
# plt.plot(range(1, num_epochs + 1), train_losses, label="Train Loss", marker="o")
# plt.plot(range(1, num_epochs + 1), val_losses, label="Validation Loss", marker="s")
# plt.xlabel("Epochs")
# plt.ylabel("Loss")
# plt.title("Training and Validation Loss Curve")
# plt.legend()
# plt.grid()
# plt.savefig("loss.png")
# plt.show()
# Return the average of the 5 lowest validation scores
return sorted(val_losses)[0]