2323#pragma once
2424
2525#include < cmath>
26+ #include < cstddef>
27+ #include < cstdlib>
2628
2729namespace tinymind {
2830
29- enum layer_e
31+ // Forward declaration: definition lives in neuralnet.hpp.
32+ template <size_t ...> struct HiddenLayers ;
33+
34+ namespace detail {
35+
36+ /* *
37+ * XavierStages computes per-stage metrics for a network with NumberOfInputs
38+ * inputs, the given HiddenLayers<...> descriptor, and NumberOfOutputs outputs.
39+ *
40+ * A "stage" is the set of weights between two adjacent layers. For L hidden
41+ * layers there are L+1 stages (input->H[0], H[0]->H[1], ..., H[L-1]->O).
42+ *
43+ * Each source layer carries a bias neuron, so the weight count for stage k is
44+ * (LayerSize(k) + 1) * LayerSize(k+1). The Xavier fan-sum at stage k is
45+ * LayerSize(k) + LayerSize(k+1).
46+ */
47+ template <size_t NumberOfInputs, typename HiddenLayersDesc, size_t NumberOfOutputs>
48+ struct XavierStages ;
49+
50+ template <size_t NumberOfInputs, size_t NumberOfOutputs, size_t ... Sizes>
51+ struct XavierStages <NumberOfInputs, HiddenLayers<Sizes...>, NumberOfOutputs>
52+ {
53+ static constexpr size_t Count = sizeof ...(Sizes) + 1 ;
54+
55+ static constexpr size_t layerSize (const size_t k)
56+ {
57+ constexpr size_t sizes[] = { NumberOfInputs, Sizes..., NumberOfOutputs };
58+ return sizes[k];
59+ }
60+
61+ static constexpr size_t stageWeightCount (const size_t k)
62+ {
63+ return (layerSize (k) + 1 ) * layerSize (k + 1 );
64+ }
65+
66+ static constexpr size_t stageFanSum (const size_t k)
67+ {
68+ return layerSize (k) + layerSize (k + 1 );
69+ }
70+ };
71+
72+ template <size_t Count, size_t Size, size_t ... Accumulated>
73+ struct UniformHiddenLayersForXavier
74+ {
75+ typedef typename UniformHiddenLayersForXavier<Count - 1 , Size, Size, Accumulated...>::type type;
76+ };
77+
78+ template <size_t Size, size_t ... Accumulated>
79+ struct UniformHiddenLayersForXavier <0 , Size, Accumulated...>
3080{
31- INVALID = 0 ,
32- INPUT_LAYER,
33- HIDDEN_LAYER,
34- OUTPUT_LAYER
81+ typedef HiddenLayers<Accumulated...> type;
3582};
3683
84+ } // namespace detail
85+
3786/* *
38- * The XavierWeightInitializer class implements the Xavier weight initialization algorithm.
39- * It generates weights for neural network connections based on the number of inputs and outputs
40- * of each neuron, ensuring that the weights are initialized in a way that helps maintain
41- * the variance of activations across layers.
42- *
43- * This is very tied to the neural network initializtion order, so be careful if changing that.
44- * It was done this way to minimize the touch to existing code.
87+ * XavierWeightInitializerForLayers — Xavier weight initializer that supports
88+ * heterogeneous hidden layer widths via the same HiddenLayers<S0, S1, ...>
89+ * descriptor used by NeuralNetwork in neuralnet.hpp.
90+ *
91+ * Each call to generateUniformWeight()/generateNormalWeight() emits one weight
92+ * for the next outgoing connection, advancing through the layer pairs in the
93+ * same order the network's initializeWeights() chain visits them:
94+ * input layer -> first hidden, first hidden -> second hidden, ...,
95+ * last hidden -> output. Both regular neurons and per-layer bias neurons
96+ * contribute to each stage's weight count.
4597 */
46- template <
47- size_t NumberOfInputs,
48- size_t NumberOfHiddenLayers,
49- size_t NumberOfNeuronsInHiddenLayers,
50- size_t NumberOfOutputs>
51- struct XavierWeightInitializer
98+ template <size_t NumberOfInputs, typename HiddenLayersDesc, size_t NumberOfOutputs>
99+ struct XavierWeightInitializerForLayers ;
100+
101+ template <size_t NumberOfInputs, size_t NumberOfOutputs, size_t ... Sizes>
102+ struct XavierWeightInitializerForLayers <NumberOfInputs, HiddenLayers<Sizes...>, NumberOfOutputs>
52103{
53104private:
54- static const unsigned NumberOfNeurons = (NumberOfInputs + (NumberOfHiddenLayers * NumberOfNeuronsInHiddenLayers) + NumberOfOutputs);
55- static const unsigned FirstHiddenNeuron = NumberOfInputs;
56- static const unsigned FirstOuputNeuron = (NumberOfInputs + (NumberOfHiddenLayers * NumberOfNeuronsInHiddenLayers));
57-
58- unsigned neuron;
59- layer_e previousLayer;
60- layer_e currentLayer;
61- layer_e nextLayer;
62- unsigned numInputs;
63- unsigned numOutputs;
64-
65- void advanceNeuron ()
66- {
67- ++neuron;
68- if (neuron >= NumberOfNeurons)
69- {
70- // reset for next call
71- neuron = 0 ;
72- previousLayer = layer_e::INVALID;
73- currentLayer = layer_e::INPUT_LAYER;
74- nextLayer = layer_e::HIDDEN_LAYER;
75- numInputs = NumberOfInputs;
76- numOutputs = NumberOfNeuronsInHiddenLayers;
77- }
78- else
79- {
80- if (neuron >= FirstOuputNeuron)
81- {
82- currentLayer = layer_e::OUTPUT_LAYER;
83- previousLayer = layer_e::HIDDEN_LAYER;
84- nextLayer = layer_e::INVALID;
85- }
86- else
87- {
88- if ((neuron >= FirstHiddenNeuron) && (neuron < FirstOuputNeuron))
89- {
90- currentLayer = layer_e::HIDDEN_LAYER;
91-
92- if (neuron < (NumberOfInputs + NumberOfNeuronsInHiddenLayers))
93- {
94- previousLayer = layer_e::INPUT_LAYER;
95- }
96- else
97- {
98- previousLayer = layer_e::HIDDEN_LAYER;
99- }
100-
101- if (neuron + NumberOfNeuronsInHiddenLayers >= FirstOuputNeuron)
102- {
103- nextLayer = layer_e::OUTPUT_LAYER;
104- }
105- else
106- {
107- nextLayer = layer_e::HIDDEN_LAYER;
108- }
109- }
110- }
111- }
112- }
105+ typedef detail::XavierStages<NumberOfInputs, HiddenLayers<Sizes...>, NumberOfOutputs> Stages;
106+
107+ size_t mWeightInStage ;
108+ size_t mStage ;
113109
114- void calculateInputsAndOutputs ()
110+ void advance ()
115111 {
116- if (currentLayer == layer_e::INPUT_LAYER)
112+ ++mWeightInStage ;
113+ if (mWeightInStage >= Stages::stageWeightCount (mStage ))
117114 {
118- numInputs = NumberOfInputs;
119- numOutputs = NumberOfNeuronsInHiddenLayers;
120- }
121- else if (currentLayer == layer_e::HIDDEN_LAYER)
122- {
123- if (previousLayer == layer_e::INPUT_LAYER)
124- {
125- numInputs = NumberOfInputs;
126- }
127- else
128- {
129- numInputs = NumberOfNeuronsInHiddenLayers;
130- }
131-
132- if (nextLayer == layer_e::OUTPUT_LAYER)
115+ mWeightInStage = 0 ;
116+ ++mStage ;
117+ if (mStage >= Stages::Count)
133118 {
134- numOutputs = NumberOfOutputs ;
119+ mStage = 0 ;
135120 }
136- else
137- {
138- numOutputs = NumberOfNeuronsInHiddenLayers;
139- }
140- }
141- else
142- {
143- numInputs = NumberOfNeuronsInHiddenLayers;
144- numOutputs = NumberOfOutputs;
145121 }
146122 }
147123
148124public:
149- XavierWeightInitializer () : neuron(0 ),
150- previousLayer (layer_e::INVALID),
151- currentLayer(layer_e::INPUT_LAYER),
152- nextLayer(layer_e::HIDDEN_LAYER),
153- numInputs(0 ),
154- numOutputs(0 )
125+ XavierWeightInitializerForLayers () : mWeightInStage (0 ), mStage (0 )
155126 {
156127 }
157128
158129 double generateUniformWeight ()
159130 {
160- calculateInputsAndOutputs ();
161-
162- const double limit = std::sqrt (6.0 / (static_cast <double >(numInputs + numOutputs)));
131+ const double fanSum = static_cast <double >(Stages::stageFanSum (mStage ));
132+ const double limit = std::sqrt (6.0 / fanSum);
163133 const double randomValue = ((static_cast <double >(rand ()) / RAND_MAX) * 2.0 * limit) - limit;
164134
165- advanceNeuron ();
135+ advance ();
166136
167137 return randomValue;
168138 }
169139
170140 double generateNormalWeight ()
171141 {
172- calculateInputsAndOutputs ();
173-
174- const double limit = std::sqrt (2.0 / (static_cast <double >(numInputs + numOutputs)));
142+ const double fanSum = static_cast <double >(Stages::stageFanSum (mStage ));
143+ const double limit = std::sqrt (2.0 / fanSum);
175144 const double randomValue = ((static_cast <double >(rand ()) / RAND_MAX) * 2.0 * limit) - limit;
176145
177- advanceNeuron ();
146+ advance ();
178147
179148 return randomValue;
180149 }
181150};
182- }
151+
152+ /* *
153+ * XavierWeightInitializer — backward-compatible alias for the uniform-width
154+ * case. NumberOfNeuronsInHiddenLayers is used for every hidden layer.
155+ */
156+ template <
157+ size_t NumberOfInputs,
158+ size_t NumberOfHiddenLayers,
159+ size_t NumberOfNeuronsInHiddenLayers,
160+ size_t NumberOfOutputs>
161+ using XavierWeightInitializer = XavierWeightInitializerForLayers<
162+ NumberOfInputs,
163+ typename detail::UniformHiddenLayersForXavier<NumberOfHiddenLayers, NumberOfNeuronsInHiddenLayers>::type,
164+ NumberOfOutputs>;
165+
166+ } // namespace tinymind
0 commit comments