-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsoftmax.py
More file actions
61 lines (43 loc) · 1.46 KB
/
softmax.py
File metadata and controls
61 lines (43 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from builtins import range
from matplotlib import axis
import numpy as np
from random import shuffle
def softmax_loss_naive(W, X, y, reg):
loss = 0.0
dW = np.zeros_like(W)
num_classes = W.shape[1]
num_train = X.shape[0]
for i in range(num_train):
scores = X[i].dot(W)
# Numerical stability: subtract the maximum score to prevent overflow
scores -= np.max(scores)
p = np.exp(scores)
p /= p.sum()
logp = np.log(p)
loss -= logp[y[i]]
for j in range(num_classes):
if j == y[i]:
dW[:, j] -= (1 - p[j]) * X[i]
else:
dW[:, j] += p[j] * X[i]
loss = loss / num_train + reg * np.sum(W * W)
dW = dW / num_train + 2 * reg * W # Fixed: was / num_classes
return loss, dW
def softmax_loss_vectorized(W, X, y, reg):
loss = 0.0
dW = np.zeros_like(W)
N = X.shape[0]
scores = X.dot(W)
# Numerical stability: subtract the maximum score to prevent overflow
scores -= scores.max(axis=1, keepdims=True)
exp_scores = np.exp(scores)
probs = exp_scores / exp_scores.sum(axis=1, keepdims=True)
correct_logprobs = -np.log(probs[np.arange(N), y])
data_loss = correct_logprobs.mean()
reg_loss = reg * np.sum(W * W)
loss = data_loss + reg_loss
dscores = probs
dscores[np.arange(N), y] -= 1
dW = X.T.dot(dscores) / N
dW += 2 * reg * W
return loss, dW