Skip to content

Commit 968f788

Browse files
authored
Merge pull request #6 from techpro-studio/dev
Polished
2 parents 87250fb + ea8d8cb commit 968f788

21 files changed

Lines changed: 302 additions & 230 deletions

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ set(PRIVATE_HEADERS
4343
nntoolkitcore/core/ops.h
4444
nntoolkitcore/core/loop.h
4545
nntoolkitcore/core/memory.h
46-
nntoolkitcore/layers/shared.h)
46+
nntoolkitcore/layers/shared.h)
4747

4848
set(SOURCES
4949
nntoolkitcore/layers/activation.c
@@ -63,6 +63,9 @@ set(SOURCES
6363
nntoolkitcore/layers/private/recurrent_private.h
6464
nntoolkitcore/core/memory.c
6565
nntoolkitcore/layers/bidirectional.c
66+
nntoolkitcore/layers/private/weights_private.c
67+
nntoolkitcore/layers/private/weights_private.h
68+
nntoolkitcore/layers/recurrent.c
6669
)
6770

6871
if(APPLE)

nntoolkitcore/core/debug.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
// Copyright © 2020 Alex. All rights reserved.
77
//
88

9-
#ifndef degub_h
10-
#define degub_h
9+
#ifndef debug_h
10+
#define debug_h
1111

1212
#include <stdio.h>
1313
#if defined __cplusplus
@@ -24,4 +24,4 @@ void print_tensor(const float *tensor, int *shape, int shapeSize);
2424
}
2525
#endif
2626

27-
#endif /* degub_h */
27+
#endif /* debug_h */

nntoolkitcore/core/ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
#ifndef ops_h
1010
#define ops_h
1111

12+
#include "stdbool.h"
1213

1314
#if defined __cplusplus
1415
extern "C" {
1516
#endif
1617

17-
#include "stdbool.h"
1818

1919
void op_vec_sub(const float *a, const float *b, float *result, int size);
2020

nntoolkitcore/layers/batch_norm.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ void BatchNormDestroy(BatchNorm filter) {
9797
BatchNormGradient *BatchNormGradientCreate(BatchNormConfig config, BatchNormTrainingConfig training_config) {
9898
BatchNormGradient *grad = malloc(sizeof(BatchNormGradient));
9999
int feat = config.feature_channels;
100-
int buff = 2 * feat * training_config.mini_batch_size + feat * config.count * training_config.mini_batch_size;
100+
int buff = 2 * feat + feat * config.count * training_config.mini_batch_size;
101101
grad->d_beta = f_malloc(buff);
102-
grad->d_gamma = grad->d_beta + feat * training_config.mini_batch_size;
103-
grad->d_x = grad->d_gamma + feat * training_config.mini_batch_size;
102+
grad->d_gamma = grad->d_beta + feat;
103+
grad->d_x = grad->d_gamma + feat;
104104
return grad;
105105
}
106106

nntoolkitcore/layers/conv_1d.c

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
#include "nntoolkitcore/core/loop.h"
1111
#include "nntoolkitcore/core/ops.h"
1212
#include "nntoolkitcore/core/memory.h"
13+
#include "nntoolkitcore/layers/private/weights_private.h"
1314

1415
typedef struct {
1516
ConvTrainingConfig config;
1617
float *input_transposed;
18+
DefaultGradient **batch_gradients;
1719
} Conv1dTrainingData;
1820

1921
typedef struct {
@@ -27,26 +29,43 @@ struct Conv1dStruct {
2729
Conv1dTrainingData *training_data;
2830
};
2931

32+
ConvWeightsSize conv1d_weight_size_from_config(Conv1dConfig config) {
33+
int w_size = config.kernel_size * config.input_feature_channels * config.output_feature_channels;
34+
int sum = w_size + config.output_feature_channels;
35+
return (DefaultWeightsSize) {.w = w_size, .b = config.output_feature_channels, .sum = sum};
36+
}
37+
38+
39+
static Conv1dInferenceData *conv1d_inference_data_create(Conv1dConfig config) {
40+
Conv1dInferenceData *data = malloc(sizeof(Conv1dInferenceData));
41+
data->buffer = malloc(config.input_size * config.input_feature_channels * sizeof(float));
42+
return data;
43+
}
44+
3045
static Conv1dTrainingData *conv1d_training_data_create(Conv1dConfig config, ConvTrainingConfig training_config) {
3146
Conv1dTrainingData *data = malloc(sizeof(Conv1dTrainingData));
47+
int b = training_config.mini_batch_size;
3248
data->config = training_config;
3349
data->input_transposed = malloc(config.input_feature_channels * config.input_size
34-
* training_config.mini_batch_size * sizeof(float));
50+
* b * sizeof(float));
51+
data->batch_gradients = malloc(b * sizeof(DefaultGradient *));
52+
for (int i = 0; i < b; ++i) {
53+
data->batch_gradients[i] = default_gradient_create(conv1d_weight_size_from_config(config), 0);
54+
}
3555
return data;
3656
}
3757

3858
static void conv_training_data_destroy(Conv1dTrainingData *training_data) {
59+
for (int i = 0; i < training_data->config.mini_batch_size; ++i) {
60+
default_gradient_destroy(training_data->batch_gradients[i]);
61+
}
62+
free(training_data->batch_gradients);
3963
free(training_data->input_transposed);
4064
free(training_data);
4165
}
4266

43-
static Conv1dInferenceData *conv1d_inference_data_create(Conv1dConfig config) {
44-
Conv1dInferenceData *data = malloc(sizeof(Conv1dInferenceData));
45-
data->buffer = malloc(config.input_size * config.input_feature_channels * sizeof(float));
46-
return data;
47-
}
4867

49-
static void conv1d_inference_data_destroy(Conv1dInferenceData* data) {
68+
static void conv1d_inference_data_destroy(Conv1dInferenceData *data) {
5069
free(data->buffer);
5170
free(data);
5271
}
@@ -70,11 +89,7 @@ Conv1dConfig Conv1dConfigCreate(int input_feature_channels, int output_feature_c
7089
Conv1d conv1d_create(Conv1dConfig config) {
7190
Conv1d filter = malloc(sizeof(struct Conv1dStruct));
7291
filter->config = config;
73-
filter->weights = malloc(sizeof(ConvWeights));
74-
int W_size = config.kernel_size * config.input_feature_channels * config.output_feature_channels;
75-
int weights_size = W_size + config.output_feature_channels;
76-
filter->weights->W = f_malloc(weights_size);
77-
filter->weights->b = filter->weights->W + W_size;
92+
filter->weights = default_weights_create(conv1d_weight_size_from_config(config));
7893
filter->training_data = NULL;
7994
filter->inference_data = NULL;
8095
return filter;
@@ -140,19 +155,13 @@ int Conv1dApplyInference(Conv1d filter, const float *input, float *output) {
140155
}
141156

142157
ConvGradient *Conv1dCreateGradient(Conv1dConfig config, ConvTrainingConfig training_config) {
143-
ConvGradient *gradient = malloc(sizeof(ConvGradient));
144-
int d_x_size = config.input_size * config.input_feature_channels * training_config.mini_batch_size;
145-
int d_w_size = config.input_feature_channels * config.output_feature_channels * config.kernel_size * training_config.mini_batch_size;
146-
int grad_size = d_x_size + d_w_size + config.output_feature_channels * training_config.mini_batch_size;
147-
gradient->d_W = f_malloc(grad_size);
148-
gradient->d_X = gradient->d_W + d_w_size;
149-
gradient->d_b = gradient->d_X + d_x_size;
150-
return gradient;
158+
return default_gradient_create(conv1d_weight_size_from_config(config),
159+
training_config.mini_batch_size *
160+
config.input_size * config.input_feature_channels);
151161
}
152162

153163
void ConvGradientDestroy(ConvGradient *gradient) {
154-
free(gradient->d_W);
155-
free(gradient);
164+
default_gradient_destroy(gradient);
156165
}
157166

158167
int Conv1dApplyTrainingBatch(Conv1d filter, const float *input, float *output) {
@@ -174,13 +183,6 @@ int Conv1dApplyTrainingBatch(Conv1d filter, const float *input, float *output) {
174183
}
175184

176185
void Conv1dCalculateGradient(Conv1d filter, ConvGradient *gradient, const float *d_out) {
177-
int db_size = filter->config.output_feature_channels *
178-
filter->training_data->config.mini_batch_size;
179-
for (int o = 0; o < filter->config.output_size; ++o){
180-
op_vec_add(gradient->d_b, d_out + o * db_size, gradient->d_b, db_size);
181-
}
182-
183-
184186
int k_size = filter->config.kernel_size;
185187
int batch = filter->training_data->config.mini_batch_size;
186188
int in_ftrs = filter->config.input_feature_channels;
@@ -199,6 +201,12 @@ void Conv1dCalculateGradient(Conv1d filter, ConvGradient *gradient, const float
199201
// out_n d4 d5 d6
200202

201203
for (int b = 0; b < batch; ++b) {
204+
//db
205+
float *db_batched = filter->training_data->batch_gradients[b]->d_b;
206+
for (int o = 0; o < filter->config.output_size; ++o){
207+
op_vec_add(db_batched, d_out + o * out_ftrs + b * out_size, db_batched, out_ftrs);
208+
}
209+
202210
for (int out_f = 0; out_f < out_ftrs; ++out_f) {
203211
for (int out_n = 0; out_n < filter->config.output_size; ++out_n) {
204212

@@ -219,7 +227,7 @@ void Conv1dCalculateGradient(Conv1d filter, ConvGradient *gradient, const float
219227

220228
float d_kernel[k_size];
221229
op_vec_mul_sc(row_ptr, d_o, d_kernel, k_size);
222-
float *d_W = gradient->d_W + W_size * b + weights_offset;
230+
float *d_W = filter->training_data->batch_gradients[b]->d_W + weights_offset;
223231
op_vec_add(d_W, d_kernel, d_W, k_size);
224232

225233
// d_X;
@@ -233,6 +241,7 @@ void Conv1dCalculateGradient(Conv1d filter, ConvGradient *gradient, const float
233241
}
234242
op_mat_transp(d_x_transposed + b * inp_size, gradient->d_X + b * inp_size, filter->config.input_size, in_ftrs);
235243
}
244+
default_gradient_sum(filter->training_data->batch_gradients, gradient, conv1d_weight_size_from_config(filter->config), batch);
236245
}
237246

238247

nntoolkitcore/layers/conv_1d.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,9 @@ typedef struct {
3030

3131
Conv1dConfig Conv1dConfigCreate(int input_feature_channels, int output_feature_channels, int kernel_size, int stride, int inputSize);
3232

33-
typedef struct {
34-
float *W;
35-
float *b;
36-
} ConvWeights;
37-
38-
typedef struct {
39-
float *d_W;
40-
float *d_b;
41-
float *d_X;
42-
} ConvGradient;
33+
typedef DefaultWeights ConvWeights;
34+
typedef DefaultWeightsSize ConvWeightsSize;
35+
typedef DefaultGradient ConvGradient;
4336

4437
typedef struct Conv1dStruct* Conv1d;
4538

nntoolkitcore/layers/dense.c

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,46 @@
1010
#include "nntoolkitcore/core/ops.h"
1111
#include "nntoolkitcore/core/memory.h"
1212
#include "nntoolkitcore/core/loop.h"
13+
#include "nntoolkitcore/layers/private/weights_private.h"
1314

1415
typedef struct {
1516
DenseTrainingConfig config;
1617
float *x;
1718
float *z;
1819
float *a;
1920
float *dz;
21+
DefaultGradient **batch_gradients;
2022
} DenseTrainingData;
2123

24+
DenseWeightsSize dense_weight_size_from_config(DenseConfig config){
25+
int w_size = config.input_size * config.output_size;
26+
int sum = w_size + config.output_size;
27+
return (DefaultWeightsSize) { .w = w_size, .b = config.output_size, .sum = sum };
28+
}
29+
2230
DenseTrainingData *dense_training_data_create(DenseConfig config, DenseTrainingConfig training_config) {
2331
DenseTrainingData *data = malloc(sizeof(DenseTrainingData));
2432
data->config = training_config;
25-
int x_size = config.input_size * training_config.mini_batch_size;
26-
int z_size = config.output_size * training_config.mini_batch_size;
33+
int b = training_config.mini_batch_size;
34+
int x_size = config.input_size * b;
35+
int z_size = config.output_size * b;
2736
int buff_size = x_size + 3 * z_size;
2837
data->x = f_malloc(buff_size);
2938
data->z = data->x + x_size;
3039
data->a = data->z + z_size;
3140
data->dz = data->a + z_size;
41+
data->batch_gradients = malloc( b * sizeof(DefaultGradient*));
42+
for (int i = 0; i < b; ++i){
43+
data->batch_gradients[i] = default_gradient_create(dense_weight_size_from_config(config), 0);
44+
}
3245
return data;
3346
}
3447

3548
void dense_training_data_destroy(DenseTrainingData *data) {
49+
for (int i = 0; i < data->config.mini_batch_size; ++i){
50+
default_gradient_destroy(data->batch_gradients[i]);
51+
}
52+
free(data->batch_gradients);
3653
free(data->x);
3754
free(data);
3855
}
@@ -55,20 +72,18 @@ DenseConfig DenseConfigCreate(int input_size, int output_size, ActivationFunctio
5572
return config;
5673
}
5774

75+
76+
5877
Dense DenseCreateForInference(DenseConfig config) {
5978
Dense filter = malloc(sizeof(struct DenseStruct));
6079
filter->config = config;
6180
filter->training_data = NULL;
62-
filter->weights = malloc(sizeof(DenseWeights));
63-
int weights_size = config.input_size * (config.output_size + 1);
64-
filter->weights->W = f_malloc(weights_size);
65-
filter->weights->b = filter->weights->W + config.input_size * config.output_size;
81+
filter->weights = default_weights_create(dense_weight_size_from_config(config));
6682
return filter;
6783
}
6884

6985
void DenseDestroy(Dense filter) {
70-
free(filter->weights->W);
71-
free(filter->weights);
86+
default_weights_destroy(filter->weights);
7287
if (filter->training_data) {
7388
dense_training_data_destroy(filter->training_data);
7489
}
@@ -82,19 +97,22 @@ Dense DenseCreateForTraining(DenseConfig config, DenseTrainingConfig training_co
8297
}
8398

8499
DenseGradient *DenseGradientCreate(DenseConfig config, DenseTrainingConfig training_config) {
85-
DenseGradient *grad = malloc(sizeof(DenseGradient));
86-
int d_w_size = config.input_size * config.output_size * training_config.mini_batch_size;
87-
int d_x_size = config.input_size * training_config.mini_batch_size;
88-
int grad_size = d_w_size + d_x_size + config.output_size * training_config.mini_batch_size;
89-
grad->d_W = f_malloc(grad_size);
90-
grad->d_X = grad->d_W + d_w_size;
91-
grad->d_b = grad->d_X + d_x_size;
92-
return grad;
100+
return default_gradient_create(
101+
dense_weight_size_from_config(config),
102+
training_config.mini_batch_size * config.input_size
103+
);
93104
}
94105

106+
DenseGradient *DenseGradientCreateFromFilter(Dense dense) {
107+
if (dense->training_data == NULL){
108+
return NULL;
109+
}
110+
return DenseGradientCreate(dense->config, dense->training_data->config);
111+
}
112+
113+
95114
void DenseGradientDestroy(DenseGradient *gradient) {
96-
free(gradient->d_W);
97-
free(gradient);
115+
default_gradient_destroy(gradient);
98116
}
99117

100118
DenseConfig DenseGetConfig(Dense filter) {
@@ -146,7 +164,8 @@ int DenseApplyTrainingBatch(Dense filter, const float *input, float *output) {
146164
void DenseCalculateGradient(Dense filter, DenseGradient *gradient, float *d_out) {
147165
int out = filter->config.output_size;
148166
int in = filter->config.input_size;
149-
P_LOOP_START(filter->training_data->config.mini_batch_size, b)
167+
int batch = filter->training_data->config.mini_batch_size;
168+
P_LOOP_START(batch, b)
150169
// dz = d_out * d_activation ?? 1;
151170
float *dz = filter->training_data->dz + b * out;
152171
if (filter->config.activation) {
@@ -156,11 +175,13 @@ void DenseCalculateGradient(Dense filter, DenseGradient *gradient, float *d_out)
156175
f_copy(dz, d_out + b * out, out);
157176
}
158177
//db = dz;
159-
f_copy(gradient->d_b + b * out, dz, out);
178+
f_copy(filter->training_data->batch_gradients[b]->d_b, dz, out);
160179
// DW = dz * X;
161-
op_mat_mul(filter->training_data->x + b * in, dz, gradient->d_W + b * in * out, in, out, 1);
180+
op_mat_mul(filter->training_data->x + b * in, dz, filter->training_data->batch_gradients[b]->d_W, in, out, 1);
162181
// DX = dz * W;
163182
op_mat_mul(filter->weights->W, dz, gradient->d_X + b * in, in, 1, out);
164183
P_LOOP_END
184+
default_gradient_sum(filter->training_data->batch_gradients, gradient, dense_weight_size_from_config(filter->config), batch);
165185
}
166186

187+

nntoolkitcore/layers/dense.h

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,14 @@
1313
#include "activation.h"
1414
#include "shared.h"
1515

16+
1617
#if defined __cplusplus
1718
extern "C" {
1819
#endif
1920

20-
typedef struct {
21-
float *W;
22-
float *b;
23-
} DenseWeights;
24-
25-
typedef struct {
26-
float *d_W;
27-
float *d_b;
28-
float *d_X;
29-
} DenseGradient;
21+
typedef DefaultWeights DenseWeights;
22+
typedef DefaultWeightsSize DenseWeightsSize;
23+
typedef DefaultGradient DenseGradient;
3024

3125
typedef DefaultTrainingConfig DenseTrainingConfig;
3226

@@ -42,6 +36,8 @@ void DenseGradientDestroy(DenseGradient *gradient);
4236

4337
typedef struct DenseStruct* Dense;
4438

39+
DenseGradient* DenseGradientCreateFromFilter(Dense dense);
40+
4541
DenseWeights* DenseGetWeights(Dense filter);
4642

4743
DenseConfig DenseConfigCreate(int input_size, int output_size, ActivationFunction activation);

0 commit comments

Comments
 (0)