11class_name Network
22extends RefCounted
33# Written by Variable-ind (https://github.com/Variable-ind)
4- enum Activation { SIGMOID , RELU }
4+ # Heavily inspired by https://github.com/mnielsen/neural-networks-and-deep-learning/blob/master/src/network.py
55
66signal activation_changed (layer_idx : int , activations : Matrix )
77
8- var num_layers : int
9- var sizes := PackedInt32Array ()
8+ var num_layers : int :
9+ get :
10+ return layer_sizes .size ()
11+
1012var weights : Array [Matrix ] ## an array of weight square matrices
1113var biases : Array [Matrix ] ## an array of bias column matrices
1214
15+ var layer_sizes := PackedInt32Array ()
1316var _visualizer : Node
1417
1518
1619func _init (
17- _layer_sizes : PackedInt32Array ,
20+ _sizes : PackedInt32Array ,
1821 copy_weights : Array [Matrix ] = [],
1922 copy_biases : Array [Matrix ] = []
2023) -> void :
2124 # initializing with random weights and biases
22- num_layers = _layer_sizes . size ()
23- sizes = _layer_sizes
25+ randomize ()
26+ layer_sizes = _sizes
2427 if not copy_weights .is_empty () and not copy_weights .is_empty ():
2528 biases = copy_biases .duplicate (true )
2629 weights = copy_weights .duplicate (true )
2730 return
2831
29- # make a fresh random copy
30- for layer in range (1 , sizes .size ()): # 0th layer will have no bias/weights so we start with 1
31- var row_size = sizes [layer ] # number of rows (no of neurons in current layer)
32- var column_size = sizes [layer - 1 ] # number of columns (no of neurons in previous layer)
32+ # Make a fresh random copy
33+ # NOTE: 0th layer will have no bias/weights so we start with 1
34+ for layer in range (1 , layer_sizes .size ()):
35+ # number of rows (no of neurons in current layer)
36+ var row_size = layer_sizes [layer ]
37+ # number of columns (no of neurons in previous layer)
38+ var column_size = layer_sizes [layer - 1 ]
3339 biases .append (Matrix .new (row_size , 1 , true ))
3440 weights .append (Matrix .new (row_size , column_size , true ))
3541
3642
37- func feedforward (inputs : Array [float ], activation_func : = Activation . SIGMOID ) -> Matrix :
38- assert (inputs .size () == sizes [0 ], "Inputs are not equal to first layer nodes" )
43+ func feedforward (inputs : Array [float ]) -> Matrix :
44+ assert (inputs .size () == layer_sizes [0 ], "Inputs are not equal to first layer nodes" )
3945
4046 ## Feeding our inputs to the activation matrix
4147 var activation_matrix = Matrix .new (inputs .size (), 1 )
@@ -51,20 +57,130 @@ func feedforward(inputs: Array[float], activation_func := Activation.SIGMOID) ->
5157 var weight = weights [layer ] # Next layer's weight for this layer.
5258 ## Find the activation matrix for the next layer
5359 ## N+1 = Sigmoid of {(Weight).(N) + bias}
54- match activation_func :
55- Activation .SIGMOID :
56- activation_matrix = weight .product_matrix (activation_matrix ).add (bias ).sigmoid ()
57- Activation .RELU :
58- activation_matrix = weight .product_matrix (activation_matrix ).add (bias ).relu ()
60+ activation_matrix = weight .product_matrix (activation_matrix ).add (bias ).sigmoid ()
5961 emit_signal ("activation_changed" , layer + 1 , activation_matrix )
6062
6163 ## Now the activation array consist of output activation
6264 return activation_matrix
6365
6466
67+ ## Train the neural network using mini-batch stochastic
68+ ## gradient descent. The ``training_data`` is a list of tuples
69+ ## ``(x, y)`` representing the training inputs and the desired
70+ ## outputs. The other non-optional parameters are
71+ ## self-explanatory. If ``test_data`` is provided then the
72+ ## network will be evaluated against the test data after each
73+ ## epoch, and partial progress printed out. This is useful for
74+ ## tracking progress, but slows things down substantially."""
75+ func SGD (
76+ training_data : Array [Array ],
77+ epochs : int ,
78+ mini_batch_size : int ,
79+ eta : float ,
80+ test_data : Array [Array ] = []
81+ ):
82+ var n = len (training_data )
83+ for j in range (epochs ):
84+ training_data .shuffle ()
85+ for k in range (0 , n , mini_batch_size ):
86+ var mini_batch : Array [Array ] = training_data .slice (k , k + mini_batch_size )
87+ update_mini_batch (mini_batch , eta )
88+ if test_data :
89+ print ("Epoch &s: &s / &s" % [str (j ), str (evaluate (test_data )), str (len (test_data ))])
90+ else :
91+ print ("Epoch %s complete" % str (j ))
92+
93+
94+ ## Update the network's weights and biases by applying
95+ ## gradient descent using backpropagation to a single mini batch.
96+ ## The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
97+ ## is the learning rate.
98+ func update_mini_batch (mini_batch : Array [Array ], eta : float ) -> void :
99+ var total_nabla_b : Array [Matrix ]
100+ var total_nabla_w : Array [Matrix ]
101+ for b : Matrix in biases :
102+ total_nabla_b .append (Matrix .new (b .no_of_rows , b .no_of_columns ))
103+ for w : Matrix in weights :
104+ total_nabla_w .append (Matrix .new (w .no_of_rows , w .no_of_columns ))
105+
106+ for activation_and_results : Array [Matrix ] in mini_batch :
107+ ## error_array contains [nabla_b, nabla_w]
108+ var error_array = backprop (activation_and_results [0 ], activation_and_results [1 ])
109+ var delta_nabla_b : Array [Matrix ] = error_array [0 ]
110+ var delta_nabla_w : Array [Matrix ] = error_array [1 ]
111+
112+ for i : int in total_nabla_b .size ():
113+ for dnb : Matrix in delta_nabla_b :
114+ total_nabla_b [i ] = total_nabla_b [i ].add (dnb )
115+ for i : int in total_nabla_w .size ():
116+ for dnw : Matrix in delta_nabla_w :
117+ total_nabla_w [i ] = total_nabla_w [i ].add (dnw )
118+
119+ # update weights and biases accordinly
120+ for i : int in weights .size ():
121+ for nw : Matrix in total_nabla_w :
122+ weights [i ] = nw .multiply_scalar (eta / len (mini_batch )).subtract_from (weights [i ])
123+
124+ for i : int in biases .size ():
125+ for nb : Matrix in total_nabla_b :
126+ biases [i ] = nb .multiply_scalar (eta / len (mini_batch )).subtract_from (biases [i ])
127+
128+
129+ ## Return an Array [nabla_b, nabla_w] representing the gradient for the cost function C_x.
130+ ## nabla_b and nabla_w are layer-by-layer matrices, similar to biases and weights.
131+ func backprop (x : Matrix , y : Matrix ):
132+ var nabla_b_array : Array [Matrix ]
133+ var nabla_w_array : Array [Matrix ]
134+ for b : Matrix in biases :
135+ nabla_b_array .append (Matrix .new (b .no_of_rows , b .no_of_columns ))
136+ for w : Matrix in weights :
137+ nabla_w_array .append (Matrix .new (w .no_of_rows , w .no_of_columns ))
138+
139+ # feedforward
140+ var activation : Matrix = x
141+ var activations : Array [Matrix ] = [x ] # list to store all the activations, layer by layer
142+ var zs : Array [Matrix ] = [] # list to store all the z vectors, layer by layer
143+ for b : Matrix in biases :
144+ for w : Matrix in weights :
145+ var z := w .product_matrix (activation ).add (b )
146+ zs .append (z )
147+ activation = z .sigmoid ()
148+ activations .append (activation )
149+
150+ # backward pass
151+ var delta := cost_derivative (activations [- 1 ], y ).multiply_corresponding (zs [- 1 ].sigmoid_prime ())
152+ nabla_b_array [- 1 ] = delta
153+ nabla_w_array [- 1 ] = delta .product_matrix (activations [- 2 ].clone (true ))
154+ for l in range (2 , num_layers ):
155+ var z := zs [- l ]
156+ var sp = z .sigmoid_prime ()
157+ delta = weights [- l + 1 ].clone (true ).product_matrix (delta ).multiply_corresponding (sp )
158+ nabla_b_array [- l ] = delta
159+ nabla_w_array [- l ] = delta .product_matrix (activations [- l - 1 ].clone (true ))
160+ return [nabla_b_array , nabla_w_array ]
161+
162+
163+ ## Return the number of test inputs for which the neural network outputs the correct result.
164+ ## Note that the neural network's output is assumed to be the index of whichever neuron in the
165+ ## final layer has the highest activation.
166+ func evaluate (test_data : Array [Array ]):
167+ var test_results : Array [Array ] = []
168+ var sum = 0
169+ for sample : Array in test_data :
170+ test_results .append ([feedforward (sample [0 ]).argmax (), sample [1 ]])
171+ for result : Array [float ] in test_results :
172+ sum += int (result [0 ] == result [1 ])
173+
174+
175+ ## Return the vector of partial derivatives
176+ ## (partial C_x / partial a) for the output activations.
177+ func cost_derivative (output_activations : Matrix , y : Matrix ) -> Matrix :
178+ return y .subtract_from (output_activations )
179+
180+
65181## Returns a unique clone of the network
66182func clone () -> Network :
67- var new_network : Network = Network .new (sizes , weights , biases )
183+ var new_network : Network = Network .new (layer_sizes , weights , biases )
68184 return new_network
69185
70186
0 commit comments