1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
def loss(self, X, y=None):
"""Compute loss and gradient for the fully connected net.
Inputs:
- X: Array of input data of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,). y[i] gives the label for X[i].
Returns:
If y is None, then run a test-time forward pass of the model and return:
- scores: Array of shape (N, C) giving classification scores, where
scores[i, c] is the classification score for X[i] and class c.
If y is not None, then run a training-time forward and backward pass and
return a tuple of:
- loss: Scalar value giving the loss
- grads: Dictionary with the same keys as self.params, mapping parameter
names to gradients of the loss with respect to those parameters.
"""
X = X.astype(self.dtype)
mode = "test" if y is None else "train"
# Set train/test mode for batchnorm params and dropout param since they
# behave differently during training and testing.
if self.use_dropout:
self.dropout_param["mode"] = mode
if self.normalization == "batchnorm":
for bn_param in self.bn_params:
bn_param["mode"] = mode
scores = None
############################################################################
# TODO: Implement the forward pass for the fully connected net, computing #
# the class scores for X and storing them in the scores variable. #
# #
# When using dropout, you'll need to pass self.dropout_param to each #
# dropout forward pass. #
# #
# When using batch normalization, you'll need to pass self.bn_params[0] to #
# the forward pass for the first batch normalization layer, pass #
# self.bn_params[1] to the forward pass for the second batch normalization #
# layer, etc. #
############################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
aff_outs=[]
bn_outs=[]
relu_outs=[]
drop_outs=[]
aff_caches=[]
bn_caches=[]
relu_caches=[]
drop_caches=[]
for i in range(self.num_layers-1):
# affine forward
aff_out, aff_cache=None, None
if i==0:
aff_out, aff_cache=affine_forward(X,self.params['W1'],self.params['b1'])
else:
aff_out, aff_cache=affine_forward(drop_outs[-1],self.params['W'+str(i+1)],self.params['b'+str(i+1)])
aff_outs.append(aff_out)
aff_caches.append(aff_cache)
# BN forward
if self.normalization=="batchnorm":
if i!=self.num_layers-1:
tgamma=self.params['gamma'+str(i+1)]
tbeta=self.params['beta'+str(i+1)]
bnp=self.bn_params[i]
bn_out, bn_cache=batchnorm_forward(aff_out,tgamma,tbeta,bnp)
bn_outs.append(bn_out)
bn_caches.append(bn_cache)
else:
bn_out=aff_out
elif self.normalization=="layernorm":
if i!=self.num_layers-1:
tgamma=self.params['gamma'+str(i+1)]
tbeta=self.params['beta'+str(i+1)]
bnp=self.bn_params[i]
bn_out, bn_cache=layernorm_forward(aff_out,tgamma,tbeta,bnp)
bn_outs.append(bn_out)
bn_caches.append(bn_cache)
else:
bn_out=aff_out
else:
bn_out=aff_out
# ReLU forward
relu_out, relu_cache=relu_forward(bn_out)
relu_outs.append(relu_out)
relu_caches.append(relu_cache)
if self.use_dropout:
drop_out, drop_cache=dropout_forward(relu_out,self.dropout_param)
drop_caches.append(drop_cache)
else:
drop_out=relu_out
drop_outs.append(drop_out)
pass
i=self.num_layers
aff_out, aff_cache=affine_forward(drop_outs[-1],self.params['W'+str(i)],self.params['b'+str(i)])
aff_outs.append(aff_out)
aff_caches.append(aff_cache)
scores=aff_outs[-1]
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
############################################################################
# END OF YOUR CODE #
############################################################################
# If test mode return early.
if mode == "test":
return scores
loss, grads = 0.0, {}
############################################################################
# TODO: Implement the backward pass for the fully connected net. Store the #
# loss in the loss variable and gradients in the grads dictionary. Compute #
# data loss using softmax, and make sure that grads[k] holds the gradients #
# for self.params[k]. Don't forget to add L2 regularization! #
# #
# When using batch/layer normalization, you don't need to regularize the #
# scale and shift parameters. #
# #
# NOTE: To ensure that your implementation matches ours and you pass the #
# automated tests, make sure that your L2 regularization includes a factor #
# of 0.5 to simplify the expression for the gradient. #
############################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# print(self.num_layers, len(drop_caches),len(relu_caches),len(bn_caches),len(aff_caches))
loss, grad=softmax_loss(scores,y)
i=self.num_layers
grad,grads['W'+str(i)],grads['b'+str(i)]=affine_backward(grad,aff_caches[i-1])
for i in range(self.num_layers):
loss+=0.5*self.reg*np.sum(np.square(self.params['W'+str(i+1)]))
# backprop
for i in range(self.num_layers-1,0,-1):
if self.use_dropout:
grad=dropout_backward(grad,drop_caches[i-1])
grad=relu_backward(grad, relu_caches[i-1])
if self.normalization=="batchnorm":
if i!=self.num_layers:
grad,grads['gamma'+str(i)],grads['beta'+str(i)]=batchnorm_backward_alt(grad,bn_caches[i-1])
elif self.normalization=="layernorm":
if i!=self.num_layers:
grad,grads['gamma'+str(i)],grads['beta'+str(i)]=layernorm_backward(grad,bn_caches[i-1])
grad,grads['W'+str(i)],grads['b'+str(i)]=affine_backward(grad,aff_caches[i-1])
grads['W'+str(i)]+=self.reg*self.params['W'+str(i)]
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
############################################################################
# END OF YOUR CODE #
############################################################################
return loss, grads
|