Skip to content

Commit 1f6daa7

Browse files
danmcleranclaude
andcommitted
Support heterogeneous hidden layers in Xavier initializer
Re-parameterize on the same HiddenLayers<S0, S1, ...> descriptor that NeuralNetwork uses, and switch the advance counter from per-neuron to per-weight so layer-pair limits flip on actual weight-stage boundaries including bias contributions. Old four-int signature is preserved as a backward-compatible alias. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 4b2b538 commit 1f6daa7

2 files changed

Lines changed: 144 additions & 119 deletions

File tree

cpp/xavier.hpp

Lines changed: 103 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -23,160 +23,144 @@
2323
#pragma once
2424

2525
#include <cmath>
26+
#include <cstddef>
27+
#include <cstdlib>
2628

2729
namespace tinymind {
2830

29-
enum layer_e
31+
// Forward declaration: definition lives in neuralnet.hpp.
32+
template<size_t...> struct HiddenLayers;
33+
34+
namespace detail {
35+
36+
/**
37+
* XavierStages computes per-stage metrics for a network with NumberOfInputs
38+
* inputs, the given HiddenLayers<...> descriptor, and NumberOfOutputs outputs.
39+
*
40+
* A "stage" is the set of weights between two adjacent layers. For L hidden
41+
* layers there are L+1 stages (input->H[0], H[0]->H[1], ..., H[L-1]->O).
42+
*
43+
* Each source layer carries a bias neuron, so the weight count for stage k is
44+
* (LayerSize(k) + 1) * LayerSize(k+1). The Xavier fan-sum at stage k is
45+
* LayerSize(k) + LayerSize(k+1).
46+
*/
47+
template<size_t NumberOfInputs, typename HiddenLayersDesc, size_t NumberOfOutputs>
48+
struct XavierStages;
49+
50+
template<size_t NumberOfInputs, size_t NumberOfOutputs, size_t... Sizes>
51+
struct XavierStages<NumberOfInputs, HiddenLayers<Sizes...>, NumberOfOutputs>
52+
{
53+
static constexpr size_t Count = sizeof...(Sizes) + 1;
54+
55+
static constexpr size_t layerSize(const size_t k)
56+
{
57+
constexpr size_t sizes[] = { NumberOfInputs, Sizes..., NumberOfOutputs };
58+
return sizes[k];
59+
}
60+
61+
static constexpr size_t stageWeightCount(const size_t k)
62+
{
63+
return (layerSize(k) + 1) * layerSize(k + 1);
64+
}
65+
66+
static constexpr size_t stageFanSum(const size_t k)
67+
{
68+
return layerSize(k) + layerSize(k + 1);
69+
}
70+
};
71+
72+
template<size_t Count, size_t Size, size_t... Accumulated>
73+
struct UniformHiddenLayersForXavier
74+
{
75+
typedef typename UniformHiddenLayersForXavier<Count - 1, Size, Size, Accumulated...>::type type;
76+
};
77+
78+
template<size_t Size, size_t... Accumulated>
79+
struct UniformHiddenLayersForXavier<0, Size, Accumulated...>
3080
{
31-
INVALID = 0,
32-
INPUT_LAYER,
33-
HIDDEN_LAYER,
34-
OUTPUT_LAYER
81+
typedef HiddenLayers<Accumulated...> type;
3582
};
3683

84+
} // namespace detail
85+
3786
/**
38-
* The XavierWeightInitializer class implements the Xavier weight initialization algorithm.
39-
* It generates weights for neural network connections based on the number of inputs and outputs
40-
* of each neuron, ensuring that the weights are initialized in a way that helps maintain
41-
* the variance of activations across layers.
42-
*
43-
* This is very tied to the neural network initializtion order, so be careful if changing that.
44-
* It was done this way to minimize the touch to existing code.
87+
* XavierWeightInitializerForLayers — Xavier weight initializer that supports
88+
* heterogeneous hidden layer widths via the same HiddenLayers<S0, S1, ...>
89+
* descriptor used by NeuralNetwork in neuralnet.hpp.
90+
*
91+
* Each call to generateUniformWeight()/generateNormalWeight() emits one weight
92+
* for the next outgoing connection, advancing through the layer pairs in the
93+
* same order the network's initializeWeights() chain visits them:
94+
* input layer -> first hidden, first hidden -> second hidden, ...,
95+
* last hidden -> output. Both regular neurons and per-layer bias neurons
96+
* contribute to each stage's weight count.
4597
*/
46-
template<
47-
size_t NumberOfInputs,
48-
size_t NumberOfHiddenLayers,
49-
size_t NumberOfNeuronsInHiddenLayers,
50-
size_t NumberOfOutputs>
51-
struct XavierWeightInitializer
98+
template<size_t NumberOfInputs, typename HiddenLayersDesc, size_t NumberOfOutputs>
99+
struct XavierWeightInitializerForLayers;
100+
101+
template<size_t NumberOfInputs, size_t NumberOfOutputs, size_t... Sizes>
102+
struct XavierWeightInitializerForLayers<NumberOfInputs, HiddenLayers<Sizes...>, NumberOfOutputs>
52103
{
53104
private:
54-
static const unsigned NumberOfNeurons = (NumberOfInputs + (NumberOfHiddenLayers * NumberOfNeuronsInHiddenLayers) + NumberOfOutputs);
55-
static const unsigned FirstHiddenNeuron = NumberOfInputs;
56-
static const unsigned FirstOuputNeuron = (NumberOfInputs + (NumberOfHiddenLayers * NumberOfNeuronsInHiddenLayers));
57-
58-
unsigned neuron;
59-
layer_e previousLayer;
60-
layer_e currentLayer;
61-
layer_e nextLayer;
62-
unsigned numInputs;
63-
unsigned numOutputs;
64-
65-
void advanceNeuron()
66-
{
67-
++neuron;
68-
if (neuron >= NumberOfNeurons)
69-
{
70-
// reset for next call
71-
neuron = 0;
72-
previousLayer = layer_e::INVALID;
73-
currentLayer = layer_e::INPUT_LAYER;
74-
nextLayer = layer_e::HIDDEN_LAYER;
75-
numInputs = NumberOfInputs;
76-
numOutputs = NumberOfNeuronsInHiddenLayers;
77-
}
78-
else
79-
{
80-
if (neuron >= FirstOuputNeuron)
81-
{
82-
currentLayer = layer_e::OUTPUT_LAYER;
83-
previousLayer = layer_e::HIDDEN_LAYER;
84-
nextLayer = layer_e::INVALID;
85-
}
86-
else
87-
{
88-
if ((neuron >= FirstHiddenNeuron) && (neuron < FirstOuputNeuron))
89-
{
90-
currentLayer = layer_e::HIDDEN_LAYER;
91-
92-
if (neuron < (NumberOfInputs + NumberOfNeuronsInHiddenLayers))
93-
{
94-
previousLayer = layer_e::INPUT_LAYER;
95-
}
96-
else
97-
{
98-
previousLayer = layer_e::HIDDEN_LAYER;
99-
}
100-
101-
if (neuron + NumberOfNeuronsInHiddenLayers >= FirstOuputNeuron)
102-
{
103-
nextLayer = layer_e::OUTPUT_LAYER;
104-
}
105-
else
106-
{
107-
nextLayer = layer_e::HIDDEN_LAYER;
108-
}
109-
}
110-
}
111-
}
112-
}
105+
typedef detail::XavierStages<NumberOfInputs, HiddenLayers<Sizes...>, NumberOfOutputs> Stages;
106+
107+
size_t mWeightInStage;
108+
size_t mStage;
113109

114-
void calculateInputsAndOutputs()
110+
void advance()
115111
{
116-
if (currentLayer == layer_e::INPUT_LAYER)
112+
++mWeightInStage;
113+
if (mWeightInStage >= Stages::stageWeightCount(mStage))
117114
{
118-
numInputs = NumberOfInputs;
119-
numOutputs = NumberOfNeuronsInHiddenLayers;
120-
}
121-
else if (currentLayer == layer_e::HIDDEN_LAYER)
122-
{
123-
if (previousLayer == layer_e::INPUT_LAYER)
124-
{
125-
numInputs = NumberOfInputs;
126-
}
127-
else
128-
{
129-
numInputs = NumberOfNeuronsInHiddenLayers;
130-
}
131-
132-
if (nextLayer == layer_e::OUTPUT_LAYER)
115+
mWeightInStage = 0;
116+
++mStage;
117+
if (mStage >= Stages::Count)
133118
{
134-
numOutputs = NumberOfOutputs;
119+
mStage = 0;
135120
}
136-
else
137-
{
138-
numOutputs = NumberOfNeuronsInHiddenLayers;
139-
}
140-
}
141-
else
142-
{
143-
numInputs = NumberOfNeuronsInHiddenLayers;
144-
numOutputs = NumberOfOutputs;
145121
}
146122
}
147123

148124
public:
149-
XavierWeightInitializer() : neuron(0),
150-
previousLayer(layer_e::INVALID),
151-
currentLayer(layer_e::INPUT_LAYER),
152-
nextLayer(layer_e::HIDDEN_LAYER),
153-
numInputs(0),
154-
numOutputs(0)
125+
XavierWeightInitializerForLayers() : mWeightInStage(0), mStage(0)
155126
{
156127
}
157128

158129
double generateUniformWeight()
159130
{
160-
calculateInputsAndOutputs();
161-
162-
const double limit = std::sqrt(6.0 / (static_cast<double>(numInputs + numOutputs)));
131+
const double fanSum = static_cast<double>(Stages::stageFanSum(mStage));
132+
const double limit = std::sqrt(6.0 / fanSum);
163133
const double randomValue = ((static_cast<double>(rand()) / RAND_MAX) * 2.0 * limit) - limit;
164134

165-
advanceNeuron();
135+
advance();
166136

167137
return randomValue;
168138
}
169139

170140
double generateNormalWeight()
171141
{
172-
calculateInputsAndOutputs();
173-
174-
const double limit = std::sqrt(2.0 / (static_cast<double>(numInputs + numOutputs)));
142+
const double fanSum = static_cast<double>(Stages::stageFanSum(mStage));
143+
const double limit = std::sqrt(2.0 / fanSum);
175144
const double randomValue = ((static_cast<double>(rand()) / RAND_MAX) * 2.0 * limit) - limit;
176145

177-
advanceNeuron();
146+
advance();
178147

179148
return randomValue;
180149
}
181150
};
182-
}
151+
152+
/**
153+
* XavierWeightInitializer — backward-compatible alias for the uniform-width
154+
* case. NumberOfNeuronsInHiddenLayers is used for every hidden layer.
155+
*/
156+
template<
157+
size_t NumberOfInputs,
158+
size_t NumberOfHiddenLayers,
159+
size_t NumberOfNeuronsInHiddenLayers,
160+
size_t NumberOfOutputs>
161+
using XavierWeightInitializer = XavierWeightInitializerForLayers<
162+
NumberOfInputs,
163+
typename detail::UniformHiddenLayersForXavier<NumberOfHiddenLayers, NumberOfNeuronsInHiddenLayers>::type,
164+
NumberOfOutputs>;
165+
166+
} // namespace tinymind

unit_test/nn/nn_unit_test.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,22 @@ struct XavierNormalRandomNumberGenerator
250250
}
251251
};
252252

253+
template<typename ValueType, size_t NUMBER_OF_INPUTS, typename HiddenLayersDesc, size_t NUMBER_OF_OUTPUTS>
254+
struct XavierUniformHeterogeneousRandomNumberGenerator
255+
{
256+
typedef tinymind::XavierWeightInitializerForLayers<NUMBER_OF_INPUTS, HiddenLayersDesc, NUMBER_OF_OUTPUTS> XavierWeightInitializerType;
257+
typedef tinymind::ValueConverter<double, ValueType> WeightConverterPolicy;
258+
259+
static ValueType generateRandomWeight()
260+
{
261+
static XavierWeightInitializerType xavierWeightInitializer;
262+
const double temp = xavierWeightInitializer.generateUniformWeight();
263+
const ValueType weight = WeightConverterPolicy::convertToDestinationType(temp);
264+
265+
return weight;
266+
}
267+
};
268+
253269
template<
254270
typename ValueType,
255271
template<typename> class TransferFunctionRandomNumberGeneratorPolicy,
@@ -1072,6 +1088,31 @@ BOOST_AUTO_TEST_CASE(test_case_fixedpoint_nn_xor_xavier_normal)
10721088
testFixedPointNeuralNetwork_Xor(nn, path);
10731089
}
10741090

1091+
BOOST_AUTO_TEST_CASE(test_case_fixedpoint_nn_xor_xavier_heterogeneous)
1092+
{
1093+
static const size_t NUMBER_OF_INPUTS = 2;
1094+
static const size_t NUMBER_OF_OUTPUTS = 1;
1095+
static const size_t NUMBER_OF_FIXED_BITS = 8;
1096+
static const size_t NUMBER_OF_FRACTIONAL_BITS = 8;
1097+
typedef tinymind::QValue<NUMBER_OF_FIXED_BITS, NUMBER_OF_FRACTIONAL_BITS, true, tinymind::RoundUpPolicy> ValueType;
1098+
typedef tinymind::HiddenLayers<6, 4> HiddenLayersDesc;
1099+
typedef tinymind::FixedPointTransferFunctions<
1100+
ValueType,
1101+
XavierUniformHeterogeneousRandomNumberGenerator<ValueType, NUMBER_OF_INPUTS, HiddenLayersDesc, NUMBER_OF_OUTPUTS>,
1102+
tinymind::TanhActivationPolicy<ValueType>,
1103+
tinymind::TanhActivationPolicy<ValueType>> TransferFunctionsType;
1104+
typedef tinymind::NeuralNetwork< ValueType,
1105+
NUMBER_OF_INPUTS,
1106+
HiddenLayersDesc,
1107+
NUMBER_OF_OUTPUTS,
1108+
TransferFunctionsType> FixedPointHeterogeneousNetworkType;
1109+
srand(RANDOM_SEED);
1110+
char const* const path = "output/nn_fixed_xor_xavier_heterogeneous.txt";
1111+
FixedPointHeterogeneousNetworkType nn;
1112+
1113+
testFixedPointNeuralNetwork_Xor(nn, path);
1114+
}
1115+
10751116
BOOST_AUTO_TEST_CASE(test_case_fixedpoint_nn_xor_nn_copy)
10761117
{
10771118
static const size_t NUMBER_OF_INPUTS = 2;

0 commit comments

Comments
 (0)