ldp/mlp_correlation.py at main · long-context-dp/ldp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
import matplotlib.pyplot as plt
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.net(x)

def batch_mlp_corr(actions, num_epochs=10, batch_size=2048, val_ratio=0.2):
    # actions with shape (num_envs, sequence, action_dim)
    actions = torch.from_numpy(actions).float().cuda()
    N, S, dim = actions.shape
    model = MLP(dim, 512, dim).to(actions.device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    x = actions[:, :-1, :]  # Shape: (N, S-1, dim)
    y = actions[:, 1:, :]   # Shape: (N, S-1, dim)

    # Reshape to combine batch and sequence dimensions
    x = x.reshape(-1, dim)  # Shape: ((N*(S-1)), dim)
    y = y.reshape(-1, dim)  # Shape: ((N*(S-1)), dim)

    # Split x, y into train/val datasets
    dataset = TensorDataset(x, y)
    num_val = int(len(dataset) * val_ratio)
    num_train = len(dataset) - num_val
    train_dataset, val_dataset = random_split(dataset, [num_train, num_val])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Track losses
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # Training loop
        model.train()
        total_train_loss = 0
        num_train_batches = 0

        for xb, yb in train_loader:
            xb, yb = xb.to(actions.device), yb.to(actions.device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            num_train_batches += 1

        avg_train_loss = total_train_loss / num_train_batches if num_train_batches > 0 else float("inf")
        train_losses.append(avg_train_loss)

        # Validation loop
        model.eval()
        total_val_loss = 0
        num_val_batches = 0

        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(actions.device), yb.to(actions.device)
                preds = model(xb)
                loss = criterion(preds, yb)
                total_val_loss += loss.item()
                num_val_batches += 1

        avg_val_loss = total_val_loss / num_val_batches if num_val_batches > 0 else float("inf")
        val_losses.append(avg_val_loss)

        print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {avg_train_loss:.4f} - Val Loss: {avg_val_loss:.4f}")

    # # Plot training and validation loss
    # plt.figure(figsize=(8, 5))
    # plt.plot(range(1, num_epochs + 1), train_losses, label="Train Loss", marker="o")
    # plt.plot(range(1, num_epochs + 1), val_losses, label="Validation Loss", marker="s")
    # plt.xlabel("Epochs")
    # plt.ylabel("Loss")
    # plt.title("Training and Validation Loss Curve")
    # plt.legend()
    # plt.grid()
    # plt.savefig("loss.png")
    # plt.show()

    # Return the average of the 5 lowest validation scores
    return sorted(val_losses)[0]