-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.py
More file actions
143 lines (114 loc) · 4.92 KB
/
train.py
File metadata and controls
143 lines (114 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import argparse
import torch
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from dataset import PetNoseDataset
import time
from model import NoseNet
import torch.nn as nn
transform = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
def rescale_keypoints(keypoints, original_size):
original_width, original_height = original_size
x_scale = original_width / 224
y_scale = original_height / 224
rescaled_keypoints = torch.tensor([keypoints[0] * x_scale, keypoints[1] * y_scale])
return rescaled_keypoints
def train(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs):
model.train()
epoch_losses = []
epoch_val_losses = []
for epoch in range(num_epochs):
# Start time for the epoch
epoch_start_time = time.time()
# Training Phase
model.train()
train_loss = 0
num_train_samples = 0
for images, keypoints, _ in train_loader:
images = images.to(device)
keypoints = keypoints.to(device)
optimizer.zero_grad()
outputs = model(images).to(device)
# Print model outputs for debugging
# print("Model outputs:", outputs)
loss = criterion(outputs, keypoints)
loss.backward()
# Implement gradient clipping
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
train_loss += loss.item()
num_train_samples += images.size(0)
avg_train_loss = train_loss / num_train_samples
epoch_losses.append(avg_train_loss)
# RESIZING CAUSING BIZARRE VALIDATION LOSS
# # Validation Phase
# model.eval()
# val_loss = 0
# num_samples = 0
# with torch.no_grad():
# for images, true_keypoints, original_sizes in test_loader:
# images = images.to(device)
# true_keypoints = true_keypoints.to(device)
#
# # Get predictions
# predicted_keypoints = model(images)
#
# # Rescale predicted keypoints
# rescaled_keypoints = []
# for i in range(len(predicted_keypoints)):
# width, height = original_sizes[i * 2].item(), original_sizes[i * 2 + 1].item()
# rescaled_keypoints.append(rescale_keypoints(predicted_keypoints[i], (width, height)))
#
# rescaled_keypoints = torch.stack(rescaled_keypoints).to(device)
#
# # Compute loss
# loss = criterion(rescaled_keypoints, true_keypoints)
# val_loss += loss.item()
# num_samples += images.size(0)
#
# avg_val_loss = val_loss / num_samples
# epoch_val_losses.append(avg_val_loss)
scheduler.step()
# End time for the epoch
epoch_end_time = time.time()
epoch_duration = epoch_end_time - epoch_start_time
print(f'Epoch [{epoch + 1}/{num_epochs}], Duration: {epoch_duration:.2f} sec, Training Loss: {avg_train_loss:.4f}')
# , Validation Loss: {avg_val_loss:.4f}
# Save the model
torch.save(model.state_dict(), 'model.pth')
# Plot the training and validation losses
plt.plot(epoch_losses, label='Training Loss')
# plt.plot(epoch_val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss per Epoch')
# plt.legend()
plt.savefig('loss.png')
plt.show()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='PetNet')
parser.add_argument('-b', type=int, default=16, help='batch size')
parser.add_argument('-e', type=int, default=20, help='number of epochs')
args = parser.parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device {device}')
train_image_dir = './data/images-original/images/'
train_labels_file = 'train_noses.3.txt'
test_image_dir = './data/images-original/images/'
test_labels_file = 'test_noses.txt'
train_dataset = PetNoseDataset(train_image_dir, train_labels_file, transform)
train_loader = DataLoader(train_dataset, batch_size=args.b, shuffle=True)
test_dataset = PetNoseDataset(test_image_dir, test_labels_file, transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
model = NoseNet().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.8)
# Re-add test_loader to train function if want to use validation loss
train(model, train_loader, test_loader, criterion, optimizer, scheduler, args.e)