diff --git a/python/tinyProp.py b/python/tinyProp.py
index c643979..44ee2f0 100644
--- a/python/tinyProp.py
+++ b/python/tinyProp.py
@@ -2,7 +2,7 @@
import torch.nn as nn
import torch.nn.functional as F
from typing import Union
-from torch.nn.common_types import _size_2_t
+from torch.nn.common_types import _size_1_t, _size_2_t
# classes to hold TinyProp parameters on Net and Layer scope
@@ -55,7 +55,22 @@ def selectGradients(self, grad_output, params):
idx = torch.hstack(idx)
val = torch.cat(val)
return idx, val
-
+
+
+#========== Helper functions ==========#
+
+def _apply_tinyprop_mask(tp_info: "TinyPropLayer", grad_output: torch.Tensor, tp_params: TinyPropParams) -> torch.Tensor:
+ """Apply the TinyProp gradient selection to the gradient tensor."""
+
+ flattened = torch.flatten(grad_output, start_dim=1)
+ indices, values = tp_info.selectGradients(flattened, tp_params)
+
+ masked_flat = torch.zeros_like(flattened)
+ if values.numel() > 0:
+ masked_flat[indices[0], indices[1]] = values
+
+ return masked_flat.view_as(grad_output)
+
#========== LINEAR ==========#
@@ -113,9 +128,79 @@ def forward(self, input):
return SparseLinear.apply(input, self.weight, self.tpParams, self, self.bias)
-#========== CONVOLUTION ==========#
+#========== CONVOLUTION 1D ==========#
+
+class SparseConv1d(torch.autograd.Function):
+
+ @staticmethod
+ def forward(ctx, input, weight, bias, stride, padding, dilation, groups, padding_mode,
+ _reversed_padding_repeated_twice, tpParams: TinyPropParams, tpInfo: TinyPropLayer):
+ ctx.save_for_backward(input, weight, bias)
+
+ ctx.stride = stride
+ ctx.padding = padding
+ ctx.dilation = dilation
+ ctx.groups = groups
+ ctx.tpParams = tpParams
+ ctx.tpInfo = tpInfo
+
+ if padding_mode != 'zeros':
+ return F.conv1d(F.pad(input, _reversed_padding_repeated_twice, mode=padding_mode),
+ weight, bias, stride, 0, dilation, groups)
+ return F.conv1d(input, weight, bias, stride, padding, dilation, groups)
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ input, weight, bias = ctx.saved_tensors
+
+ grad_input = grad_weight = grad_bias = None
+
+ masked_grad = _apply_tinyprop_mask(ctx.tpInfo, grad_output, ctx.tpParams)
+
+ if ctx.needs_input_grad[0]:
+ grad_input = torch.nn.grad.conv1d_input(input.shape, weight, masked_grad, ctx.stride,
+ ctx.padding, ctx.dilation, ctx.groups)
+ if ctx.needs_input_grad[1]:
+ grad_weight = torch.nn.grad.conv1d_weight(input, weight.shape, masked_grad, ctx.stride,
+ ctx.padding, ctx.dilation, ctx.groups)
+ if bias is not None and ctx.needs_input_grad[2]:
+ sum_dims = (0,) + tuple(range(2, masked_grad.dim()))
+ grad_bias = masked_grad.sum(dim=sum_dims)
+
+ return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None, None
+
+
+class TinyPropConv1d(TinyPropLayer, nn.Conv1d):
+ def __init__(self,
+ in_channels: int,
+ out_channels: int,
+ kernel_size: _size_1_t,
+ tinyPropParams: TinyPropParams,
+ layer_number: int,
+ stride: _size_1_t = 1,
+ padding: Union[str, _size_1_t] = 0,
+ dilation: _size_1_t = 1,
+ groups: int = 1,
+ bias: bool = True,
+ padding_mode: str = 'zeros',
+ device=None,
+ dtype=None):
+ TinyPropLayer.__init__(self, tinyPropParams.number_of_layers - layer_number)
+ nn.Conv1d.__init__(self, in_channels, out_channels, kernel_size, stride=stride, padding=padding,
+ dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode,
+ device=device, dtype=dtype)
+
+ self.tpParams = tinyPropParams
-class SparseConv2d(torch.autograd.Function):
+ def forward(self, input):
+ return SparseConv1d.apply(input, self.weight, self.bias, self.stride, self.padding, self.dilation,
+ self.groups, self.padding_mode, self._reversed_padding_repeated_twice,
+ self.tpParams, self)
+
+
+#========== CONVOLUTION 2D ==========#
+
+class SparseConv2d(torch.autograd.Function):
# keep in mind that convolution operations DO NOT reduce the batchSize (in contrast to matmul)!
@staticmethod
@@ -150,44 +235,19 @@ def backward(ctx, grad_output):
# Initialize all gradients w.r.t. inputs to None
grad_input = grad_weight = grad_bias = None
- # This is the TinyProp part: conv can't handle sparse matrices so I have to build a masked version based on the selected gradients
- out_ch = grad_output.shape[1]
- out_width = grad_output.shape[2]
- out_height = grad_output.shape[3]
- # flatten elements to work with the gradient selection
- flattened = torch.flatten(grad_output, start_dim=1)
- indices, values = ctx.tpInfo.selectGradients(flattened, ctx.tpParams)
- # mask grad_output by reinitializing with zeros
- grad_output = torch.zeros(flattened.size())
- # then loop over and set all selected gradient entries
- for i in range(indices.size(1)):
- grad_output[indices[0, i], indices[1, i]] = values[i]
- # undo the flattening
- grad_output = grad_output.view(-1, out_ch, out_width, out_height).to(weight.device)
-
-
- # proceed with layer specific computations
+ masked_grad = _apply_tinyprop_mask(ctx.tpInfo, grad_output, ctx.tpParams)
+
if ctx.needs_input_grad[0]:
- # can be solved by deconvolving grad_output with weight
- grad_input = F.conv_transpose2d(grad_output, weight, None, ctx.stride, ctx.padding, groups=ctx.groups, dilation=ctx.dilation)
+ grad_input = torch.nn.grad.conv2d_input(input.shape, weight, masked_grad, ctx.stride,
+ ctx.padding, ctx.dilation, ctx.groups)
if ctx.needs_input_grad[1]:
- # can be solved by convolving input with grad_output, but the resulting grad_weight is 5d which the conv function can't handle.
- # I mitigate this problem by slicing the input by input channel. I can then do the convolution with this reduced dimension, where
- # I can process the batch-dimension as input channel. Later grad_weight is constructed from these sub-convolutions
-
- # use batch-dimension as in-channel [out, b, w, h] = [out, in, w, h]
- permutated = grad_output.permute(1, 0, 2, 3)
- # dismantle real input-channel
- input_channels = torch.unbind(input, dim=1)
- res = []
- for channel in input_channels:
- res.append(F.conv2d(channel, permutated, None, ctx.stride, ctx.padding, groups=ctx.groups, dilation=ctx.dilation))
- grad_weight = torch.stack(res, dim=0).permute(1, 0, 2, 3)
+ grad_weight = torch.nn.grad.conv2d_weight(input, weight.shape, masked_grad, ctx.stride,
+ ctx.padding, ctx.dilation, ctx.groups)
if bias is not None and ctx.needs_input_grad[2]:
- # simply sum up all elements over width, height
- grad_bias = torch.sum(grad_output, dim=(2,3))
+ sum_dims = (0,) + tuple(range(2, masked_grad.dim()))
+ grad_bias = masked_grad.sum(dim=sum_dims)
return grad_input, grad_weight, grad_bias, None, None, None, None, None, None, None, None
@@ -202,11 +262,15 @@ def __init__(self,
stride: _size_2_t = 1,
padding: Union[str, _size_2_t] = 0,
dilation: _size_2_t = 1,
+ groups: int = 1,
bias: bool = True,
+ padding_mode: str = 'zeros',
device = None,
dtype = None):
TinyPropLayer.__init__(self, tinyPropParams.number_of_layers - layer_number)
- nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, 1, bias, device=device, dtype=dtype)
+ nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size, stride=stride, padding=padding,
+ dilation=dilation, groups=groups, bias=bias, padding_mode=padding_mode,
+ device=device, dtype=dtype)
# Saving variables like this will pass it by REFERENCE, so changes
# made in backwards are reflected in layer
diff --git a/src/aifes.h b/src/aifes.h
index a078f8f..5551121 100644
--- a/src/aifes.h
+++ b/src/aifes.h
@@ -43,6 +43,8 @@ extern "C" {
// Include the layer base implementations
#include "basic/base/ailayer/ailayer_dense.h"
+#include "basic/base/ailayer/ailayer_conv1d.h"
+#include "basic/base/ailayer/ailayer_conv2d.h"
#include "basic/base/ailayer/ailayer_input.h"
#include "basic/base/ailayer/ailayer_relu.h"
#include "basic/base/ailayer/ailayer_leaky_relu.h"
@@ -68,6 +70,8 @@ extern "C" {
// Include the layers in default implementation
#include "basic/default/ailayer/ailayer_dense_default.h"
+#include "basic/default/ailayer/ailayer_conv1d_default.h"
+#include "basic/default/ailayer/ailayer_conv2d_default.h"
#include "basic/default/ailayer/ailayer_input_default.h"
#include "basic/default/ailayer/ailayer_relu_default.h"
#include "basic/default/ailayer/ailayer_leaky_relu_default.h"
diff --git a/src/basic/base/ailayer/ailayer_conv1d.c b/src/basic/base/ailayer/ailayer_conv1d.c
new file mode 100644
index 0000000..d361422
--- /dev/null
+++ b/src/basic/base/ailayer/ailayer_conv1d.c
@@ -0,0 +1,263 @@
+/**
+ * \file basic/base/ailayer/ailayer_conv1d.c
+ * \version 2.0alpha
+ * \date 27.05.2024
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#include "basic/base/ailayer/ailayer_conv1d.h"
+#include "basic/base/aimath/aimath_basic.h"
+
+const aicore_layertype_t ailayer_conv1d_type_s = {
+#ifdef AIDEBUG_PRINT_MODULE_SPECS
+ .name = "Conv1D",
+ .print_specs = ailayer_conv1d_print_specs
+#else
+ .name = 0,
+ .print_specs = 0
+#endif
+};
+const aicore_layertype_t *ailayer_conv1d_type = &ailayer_conv1d_type_s;
+
+ailayer_t *ailayer_conv1d(ailayer_conv1d_t *layer, ailayer_t *input_layer)
+{
+ layer->requires_grad = 0x03; /* weights and bias enabled by default */
+ layer->base.layer_type = ailayer_conv1d_type;
+
+ layer->base.input_layer = input_layer;
+ input_layer->output_layer = &(layer->base);
+
+ layer->base.layer_configuration = layer;
+ layer->base.result.dtype = layer->result_dtype;
+ layer->base.result.dim = 3;
+ layer->base.result.shape = layer->result_shape;
+
+ layer->base.deltas.dtype = layer->result_dtype;
+ layer->base.deltas.dim = input_layer->result.dim;
+#ifdef DEBUG_CHECKS
+ if(input_layer->result.dim != 3)
+ {
+ LOG_E("Conv1D: input tensor must have 3 dimensions.\n");
+ return 0;
+ }
+#endif
+ layer->base.deltas.shape = layer->deltas_shape;
+
+ uint8_t i;
+ for(i = 0; i < input_layer->result.dim && i < 3; i++){
+ layer->deltas_shape[i] = input_layer->result.shape[i];
+ }
+
+ layer->weights.dim = 3;
+ layer->weights.dtype = layer->weights_dtype;
+ layer->weights.shape = layer->weights_shape;
+ layer->weights.shape[0] = layer->out_channels;
+#ifdef DEBUG_CHECKS
+ if(layer->groups == 0){
+ LOG_E("Conv1D: groups must be greater than zero.\n");
+ return 0;
+ }
+ if(input_layer->result.shape[1] % layer->groups != 0){
+ LOG_E("Conv1D: input channels not divisible by groups.\n");
+ return 0;
+ }
+ if(layer->out_channels % layer->groups != 0){
+ LOG_E("Conv1D: output channels not divisible by groups.\n");
+ return 0;
+ }
+#endif
+ layer->weights.shape[1] = input_layer->result.shape[1] / layer->groups;
+ layer->weights.shape[2] = layer->kernel_size;
+
+ layer->bias.dim = 1;
+ layer->bias.dtype = layer->bias_dtype;
+ layer->bias.shape = layer->bias_shape;
+ layer->bias.shape[0] = layer->out_channels;
+
+ layer->base.forward = ailayer_conv1d_forward;
+ layer->base.backward = ailayer_conv1d_backward;
+ layer->base.backward_meProp = ailayer_conv1d_backward_meProp;
+
+ layer->base.calc_result_shape = ailayer_conv1d_calc_result_shape;
+ layer->base.sizeof_paramem = ailayer_conv1d_sizeof_paramem;
+ layer->base.set_paramem = ailayer_conv1d_set_paramem;
+ layer->base.sizeof_trainmem = ailayer_conv1d_sizeof_trainmem;
+ layer->base.set_trainmem = ailayer_conv1d_set_trainmem;
+
+ layer->base.get_result_bound = 0;
+
+ layer->base.trainable_params_count = 2;
+ layer->base.trainable_params = layer->trainable_params;
+ layer->base.gradients = layer->gradients;
+ layer->base.optimem = layer->optimem;
+
+ layer->trainable_params[0] = &(layer->weights);
+ layer->trainable_params[1] = &(layer->bias);
+
+ return &(layer->base);
+}
+
+void ailayer_conv1d_forward(ailayer_t *self)
+{
+ ailayer_conv1d_t *layer = (ailayer_conv1d_t *)(self->layer_configuration);
+ aitensor_t *input_tensor = &(self->input_layer->result);
+ aitensor_t *result_tensor = &(self->result);
+
+ layer->conv(input_tensor, &(layer->weights), &(layer->bias),
+ layer->stride, layer->padding, layer->dilation, layer->groups,
+ result_tensor);
+}
+
+void ailayer_conv1d_backward(ailayer_t *self)
+{
+ ailayer_conv1d_t *layer = (ailayer_conv1d_t *)(self->layer_configuration);
+ aitensor_t *delta_in = &(self->deltas);
+ aitensor_t *delta_out = &(self->output_layer->deltas);
+ aitensor_t *x_in = &(self->input_layer->result);
+
+ if(layer->requires_grad & 0x01){
+ layer->conv_weight_grad(delta_out, x_in,
+ layer->stride, layer->padding, layer->dilation, layer->groups,
+ layer->gradients[0]);
+ }
+ if(layer->requires_grad & 0x02){
+ layer->conv_bias_grad(delta_out, layer->gradients[1]);
+ }
+
+ layer->conv_input_grad(delta_out, &(layer->weights),
+ layer->stride, layer->padding, layer->dilation, layer->groups,
+ delta_in);
+}
+
+void ailayer_conv1d_backward_meProp(ailayer_t *self, float maxBpr, float minBpr, float damping, int dense_counter)
+{
+ (void)maxBpr;
+ (void)minBpr;
+ (void)damping;
+ (void)dense_counter;
+ ailayer_conv1d_backward(self);
+}
+
+void ailayer_conv1d_calc_result_shape(ailayer_t *self)
+{
+ ailayer_conv1d_t *layer = (ailayer_conv1d_t *)(self->layer_configuration);
+ aitensor_t *x_in = &(self->input_layer->result);
+
+ uint16_t batch = x_in->shape[0];
+ uint16_t in_length = x_in->shape[2];
+
+ int32_t numerator = (int32_t)in_length + 2 * (int32_t)layer->padding -
+ (int32_t)layer->dilation * ((int32_t)layer->kernel_size - 1) - 1;
+ uint16_t out_length = (uint16_t)(numerator / layer->stride + 1);
+
+ self->result.shape[0] = batch;
+ self->result.shape[1] = layer->out_channels;
+ self->result.shape[2] = out_length;
+
+ layer->deltas_shape[0] = batch;
+ layer->deltas_shape[1] = x_in->shape[1];
+ layer->deltas_shape[2] = in_length;
+}
+
+uint32_t ailayer_conv1d_sizeof_paramem(const ailayer_t *self)
+{
+ const ailayer_conv1d_t *layer = (const ailayer_conv1d_t *)(self->layer_configuration);
+ uint32_t memory = 0;
+
+ memory += layer->weights_dtype->tensor_params_size;
+ memory += layer->out_channels * layer->weights.shape[1] * layer->kernel_size *
+ aimath_sizeof_dtype(layer->weights_dtype);
+
+ memory += layer->bias_dtype->tensor_params_size;
+ memory += layer->out_channels * aimath_sizeof_dtype(layer->bias_dtype);
+
+ return memory;
+}
+
+void ailayer_conv1d_set_paramem(ailayer_t *self, void *memory_ptr)
+{
+ uint32_t address_counter = 0;
+ ailayer_conv1d_t *layer = (ailayer_conv1d_t *)(self->layer_configuration);
+
+ layer->weights.tensor_params = memory_ptr + address_counter;
+ address_counter += layer->weights_dtype->tensor_params_size;
+ layer->weights.dim = 3;
+ layer->weights.dtype = layer->weights_dtype;
+ layer->weights.shape = layer->weights_shape;
+ layer->weights.shape[0] = layer->out_channels;
+ layer->weights.shape[1] = self->input_layer->result.shape[1] / layer->groups;
+ layer->weights.shape[2] = layer->kernel_size;
+ layer->weights.data = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_data(&(layer->weights));
+
+ layer->bias.tensor_params = memory_ptr + address_counter;
+ address_counter += layer->bias_dtype->tensor_params_size;
+ layer->bias.dim = 1;
+ layer->bias.dtype = layer->bias_dtype;
+ layer->bias.shape = layer->bias_shape;
+ layer->bias.shape[0] = layer->out_channels;
+ layer->bias.data = memory_ptr + address_counter;
+
+ layer->trainable_params[0] = &(layer->weights);
+ layer->trainable_params[1] = &(layer->bias);
+}
+
+uint32_t ailayer_conv1d_sizeof_trainmem(const ailayer_t *self)
+{
+ const ailayer_conv1d_t *layer = (const ailayer_conv1d_t *)(self->layer_configuration);
+ uint32_t memory = 0;
+
+ memory += aimath_sizeof_tensor(&(layer->weights));
+ memory += aimath_sizeof_tensor(&(layer->bias));
+
+ return memory;
+}
+
+void ailayer_conv1d_set_trainmem(ailayer_t *self, void *memory_ptr)
+{
+ uint32_t address_counter = 0;
+ ailayer_conv1d_t *layer = (ailayer_conv1d_t *)(self->layer_configuration);
+
+ self->gradients[0] = memory_ptr + address_counter;
+ address_counter += sizeof(aitensor_t);
+ self->gradients[0]->dim = layer->weights.dim;
+ self->gradients[0]->dtype = layer->weights.dtype;
+ self->gradients[0]->shape = layer->weights.shape;
+ self->gradients[0]->data = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_data(self->gradients[0]);
+ self->gradients[0]->tensor_params = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_params(self->gradients[0]);
+
+ self->gradients[1] = memory_ptr + address_counter;
+ address_counter += sizeof(aitensor_t);
+ self->gradients[1]->dim = layer->bias.dim;
+ self->gradients[1]->dtype = layer->bias.dtype;
+ self->gradients[1]->shape = layer->bias.shape;
+ self->gradients[1]->data = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_data(self->gradients[1]);
+ self->gradients[1]->tensor_params = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_params(self->gradients[1]);
+}
+
+#ifdef AIDEBUG_PRINT_MODULE_SPECS
+void ailayer_conv1d_print_specs(const ailayer_t *self, int (*print)(const char *format, ...))
+{
+ const ailayer_conv1d_t *layer = (const ailayer_conv1d_t *)(self->layer_configuration);
+ print("out_channels: %lu, kernel_size: %lu, stride: %lu", (unsigned long)layer->out_channels,
+ (unsigned long)layer->kernel_size, (unsigned long)layer->stride);
+}
+#endif
diff --git a/src/basic/base/ailayer/ailayer_conv1d.h b/src/basic/base/ailayer/ailayer_conv1d.h
new file mode 100644
index 0000000..655f39e
--- /dev/null
+++ b/src/basic/base/ailayer/ailayer_conv1d.h
@@ -0,0 +1,108 @@
+/**
+ * \file basic/base/ailayer/ailayer_conv1d.h
+ * \internal
+ * \date 27.05.2024
+ * \endinternal
+ * \version 2.0alpha
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#ifndef AILAYER_CONV1D
+#define AILAYER_CONV1D
+
+#include "core/aifes_core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Forward declaration of the Conv1D layer structure. */
+typedef struct ailayer_conv1d ailayer_conv1d_t;
+
+/**
+ * @brief General Conv1D layer structure.
+ */
+struct ailayer_conv1d {
+ ailayer_t base; /**< Inherited field members from general ailayer struct. */
+ const aimath_dtype_t *result_dtype; /**< Data type of the inference result values. */
+ const aimath_dtype_t *weights_dtype; /**< Data type of the weights. */
+ const aimath_dtype_t *bias_dtype; /**< Data type of the bias. */
+
+ /** @name Layer configuration */
+ ///@{
+ uint16_t out_channels; /**< Number of output channels. */
+ uint16_t kernel_size; /**< Size of the convolution kernel. */
+ uint16_t stride; /**< Stride of the convolution. */
+ uint16_t padding; /**< Zero padding applied to both sides of the input. */
+ uint16_t dilation; /**< Kernel dilation factor. */
+ uint16_t groups; /**< Number of blocked connections from input channels to output channels. */
+ ///@}
+
+ /** @name Trainable parameters */
+ ///@{
+ aitensor_t weights; /**< Tensor containing the convolution kernels. */
+ aitensor_t bias; /**< Tensor containing the bias. */
+
+ uint16_t result_shape[3]; /**< Result tensor shape (batch, channels, length). */
+ uint16_t deltas_shape[3]; /**< Delta tensor shape. */
+ uint16_t weights_shape[3]; /**< Weights tensor shape (out_channels, in_channels / groups, kernel_size). */
+ uint16_t bias_shape[1]; /**< Bias tensor shape (out_channels). */
+
+ uint8_t requires_grad; /**< Bit mask to control gradient calculation (bit0: weights, bit1: bias). */
+
+ aitensor_t *trainable_params[2]; /**< Pointers to trainable parameter tensors. */
+ aitensor_t *gradients[2]; /**< Gradient tensors (same ordering as trainable_params). */
+ void *optimem[2]; /**< Memory used by the training optimizer. */
+ ///@}
+
+ /** @name Math functions */
+ ///@{
+ void (*conv)(const aitensor_t *input, const aitensor_t *weights, const aitensor_t *bias,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *output);
+ void (*conv_input_grad)(const aitensor_t *delta_out, const aitensor_t *weights,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *delta_in);
+ void (*conv_weight_grad)(const aitensor_t *delta_out, const aitensor_t *input,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *d_weights);
+ void (*conv_bias_grad)(const aitensor_t *delta_out, aitensor_t *d_bias);
+ ///@}
+};
+
+/** @brief Conv1D layer type indicator. */
+extern const aicore_layertype_t *ailayer_conv1d_type;
+
+ailayer_t *ailayer_conv1d(ailayer_conv1d_t *layer, ailayer_t *input_layer);
+void ailayer_conv1d_forward(ailayer_t *self);
+void ailayer_conv1d_backward(ailayer_t *self);
+void ailayer_conv1d_backward_meProp(ailayer_t *self, float maxBpr, float minBpr, float damping, int dense_counter);
+void ailayer_conv1d_calc_result_shape(ailayer_t *self);
+uint32_t ailayer_conv1d_sizeof_paramem(const ailayer_t *self);
+void ailayer_conv1d_set_paramem(ailayer_t *self, void *memory_ptr);
+uint32_t ailayer_conv1d_sizeof_trainmem(const ailayer_t *self);
+void ailayer_conv1d_set_trainmem(ailayer_t *self, void *memory_ptr);
+
+#ifdef AIDEBUG_PRINT_MODULE_SPECS
+void ailayer_conv1d_print_specs(const ailayer_t *self, int (*print)(const char *format, ...));
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AILAYER_CONV1D
diff --git a/src/basic/base/ailayer/ailayer_conv2d.c b/src/basic/base/ailayer/ailayer_conv2d.c
new file mode 100644
index 0000000..9047aba
--- /dev/null
+++ b/src/basic/base/ailayer/ailayer_conv2d.c
@@ -0,0 +1,280 @@
+/**
+ * \file basic/base/ailayer/ailayer_conv2d.c
+ * \version 2.0alpha
+ * \date 27.05.2024
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#include "basic/base/ailayer/ailayer_conv2d.h"
+#include "basic/base/aimath/aimath_basic.h"
+
+const aicore_layertype_t ailayer_conv2d_type_s = {
+#ifdef AIDEBUG_PRINT_MODULE_SPECS
+ .name = "Conv2D",
+ .print_specs = ailayer_conv2d_print_specs
+#else
+ .name = 0,
+ .print_specs = 0
+#endif
+};
+const aicore_layertype_t *ailayer_conv2d_type = &ailayer_conv2d_type_s;
+
+ailayer_t *ailayer_conv2d(ailayer_conv2d_t *layer, ailayer_t *input_layer)
+{
+ layer->requires_grad = 0x03; /* weights and bias enabled by default */
+ layer->base.layer_type = ailayer_conv2d_type;
+
+ layer->base.input_layer = input_layer;
+ input_layer->output_layer = &(layer->base);
+
+ layer->base.layer_configuration = layer;
+ layer->base.result.dtype = layer->result_dtype;
+ layer->base.result.dim = 4;
+ layer->base.result.shape = layer->result_shape;
+
+ layer->base.deltas.dtype = layer->result_dtype;
+ layer->base.deltas.dim = input_layer->result.dim;
+#ifdef DEBUG_CHECKS
+ if(input_layer->result.dim != 4)
+ {
+ LOG_E("Conv2D: input tensor must have 4 dimensions.\n");
+ return 0;
+ }
+#endif
+ layer->base.deltas.shape = layer->deltas_shape;
+
+ uint8_t i;
+ for(i = 0; i < input_layer->result.dim && i < 4; i++){
+ layer->deltas_shape[i] = input_layer->result.shape[i];
+ }
+
+ layer->weights.dim = 4;
+ layer->weights.dtype = layer->weights_dtype;
+ layer->weights.shape = layer->weights_shape;
+ layer->weights.shape[0] = layer->out_channels;
+#ifdef DEBUG_CHECKS
+ if(layer->groups == 0){
+ LOG_E("Conv2D: groups must be greater than zero.\n");
+ return 0;
+ }
+ if(input_layer->result.shape[1] % layer->groups != 0){
+ LOG_E("Conv2D: input channels not divisible by groups.\n");
+ return 0;
+ }
+ if(layer->out_channels % layer->groups != 0){
+ LOG_E("Conv2D: output channels not divisible by groups.\n");
+ return 0;
+ }
+#endif
+ layer->weights.shape[1] = input_layer->result.shape[1] / layer->groups;
+ layer->weights.shape[2] = layer->kernel_height;
+ layer->weights.shape[3] = layer->kernel_width;
+
+ layer->bias.dim = 1;
+ layer->bias.dtype = layer->bias_dtype;
+ layer->bias.shape = layer->bias_shape;
+ layer->bias.shape[0] = layer->out_channels;
+
+ layer->base.forward = ailayer_conv2d_forward;
+ layer->base.backward = ailayer_conv2d_backward;
+ layer->base.backward_meProp = ailayer_conv2d_backward_meProp;
+
+ layer->base.calc_result_shape = ailayer_conv2d_calc_result_shape;
+ layer->base.sizeof_paramem = ailayer_conv2d_sizeof_paramem;
+ layer->base.set_paramem = ailayer_conv2d_set_paramem;
+ layer->base.sizeof_trainmem = ailayer_conv2d_sizeof_trainmem;
+ layer->base.set_trainmem = ailayer_conv2d_set_trainmem;
+
+ layer->base.get_result_bound = 0;
+
+ layer->base.trainable_params_count = 2;
+ layer->base.trainable_params = layer->trainable_params;
+ layer->base.gradients = layer->gradients;
+ layer->base.optimem = layer->optimem;
+
+ layer->trainable_params[0] = &(layer->weights);
+ layer->trainable_params[1] = &(layer->bias);
+
+ return &(layer->base);
+}
+
+void ailayer_conv2d_forward(ailayer_t *self)
+{
+ ailayer_conv2d_t *layer = (ailayer_conv2d_t *)(self->layer_configuration);
+ aitensor_t *input_tensor = &(self->input_layer->result);
+ aitensor_t *result_tensor = &(self->result);
+
+ layer->conv(input_tensor, &(layer->weights), &(layer->bias),
+ layer->stride_height, layer->stride_width,
+ layer->padding_height, layer->padding_width,
+ layer->dilation_height, layer->dilation_width,
+ layer->groups, result_tensor);
+}
+
+void ailayer_conv2d_backward(ailayer_t *self)
+{
+ ailayer_conv2d_t *layer = (ailayer_conv2d_t *)(self->layer_configuration);
+ aitensor_t *delta_in = &(self->deltas);
+ aitensor_t *delta_out = &(self->output_layer->deltas);
+ aitensor_t *x_in = &(self->input_layer->result);
+
+ if(layer->requires_grad & 0x01){
+ layer->conv_weight_grad(delta_out, x_in,
+ layer->stride_height, layer->stride_width,
+ layer->padding_height, layer->padding_width,
+ layer->dilation_height, layer->dilation_width,
+ layer->groups, layer->gradients[0]);
+ }
+ if(layer->requires_grad & 0x02){
+ layer->conv_bias_grad(delta_out, layer->gradients[1]);
+ }
+
+ layer->conv_input_grad(delta_out, &(layer->weights),
+ layer->stride_height, layer->stride_width,
+ layer->padding_height, layer->padding_width,
+ layer->dilation_height, layer->dilation_width,
+ layer->groups, delta_in);
+}
+
+void ailayer_conv2d_backward_meProp(ailayer_t *self, float maxBpr, float minBpr, float damping, int dense_counter)
+{
+ (void)maxBpr;
+ (void)minBpr;
+ (void)damping;
+ (void)dense_counter;
+ ailayer_conv2d_backward(self);
+}
+
+void ailayer_conv2d_calc_result_shape(ailayer_t *self)
+{
+ ailayer_conv2d_t *layer = (ailayer_conv2d_t *)(self->layer_configuration);
+ aitensor_t *x_in = &(self->input_layer->result);
+
+ uint16_t batch = x_in->shape[0];
+ uint16_t in_height = x_in->shape[2];
+ uint16_t in_width = x_in->shape[3];
+
+ int32_t numerator_h = (int32_t)in_height + 2 * (int32_t)layer->padding_height -
+ (int32_t)layer->dilation_height * ((int32_t)layer->kernel_height - 1) - 1;
+ uint16_t out_height = (uint16_t)(numerator_h / layer->stride_height + 1);
+
+ int32_t numerator_w = (int32_t)in_width + 2 * (int32_t)layer->padding_width -
+ (int32_t)layer->dilation_width * ((int32_t)layer->kernel_width - 1) - 1;
+ uint16_t out_width = (uint16_t)(numerator_w / layer->stride_width + 1);
+
+ self->result.shape[0] = batch;
+ self->result.shape[1] = layer->out_channels;
+ self->result.shape[2] = out_height;
+ self->result.shape[3] = out_width;
+
+ layer->deltas_shape[0] = batch;
+ layer->deltas_shape[1] = x_in->shape[1];
+ layer->deltas_shape[2] = in_height;
+ layer->deltas_shape[3] = in_width;
+}
+
+uint32_t ailayer_conv2d_sizeof_paramem(const ailayer_t *self)
+{
+ const ailayer_conv2d_t *layer = (const ailayer_conv2d_t *)(self->layer_configuration);
+ uint32_t memory = 0;
+
+ memory += layer->weights_dtype->tensor_params_size;
+ memory += layer->out_channels * layer->weights.shape[1] * layer->kernel_height * layer->kernel_width *
+ aimath_sizeof_dtype(layer->weights_dtype);
+
+ memory += layer->bias_dtype->tensor_params_size;
+ memory += layer->out_channels * aimath_sizeof_dtype(layer->bias_dtype);
+
+ return memory;
+}
+
+void ailayer_conv2d_set_paramem(ailayer_t *self, void *memory_ptr)
+{
+ uint32_t address_counter = 0;
+ ailayer_conv2d_t *layer = (ailayer_conv2d_t *)(self->layer_configuration);
+
+ layer->weights.tensor_params = memory_ptr + address_counter;
+ address_counter += layer->weights_dtype->tensor_params_size;
+ layer->weights.dim = 4;
+ layer->weights.dtype = layer->weights_dtype;
+ layer->weights.shape = layer->weights_shape;
+ layer->weights.shape[0] = layer->out_channels;
+ layer->weights.shape[1] = self->input_layer->result.shape[1] / layer->groups;
+ layer->weights.shape[2] = layer->kernel_height;
+ layer->weights.shape[3] = layer->kernel_width;
+ layer->weights.data = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_data(&(layer->weights));
+
+ layer->bias.tensor_params = memory_ptr + address_counter;
+ address_counter += layer->bias_dtype->tensor_params_size;
+ layer->bias.dim = 1;
+ layer->bias.dtype = layer->bias_dtype;
+ layer->bias.shape = layer->bias_shape;
+ layer->bias.shape[0] = layer->out_channels;
+ layer->bias.data = memory_ptr + address_counter;
+
+ layer->trainable_params[0] = &(layer->weights);
+ layer->trainable_params[1] = &(layer->bias);
+}
+
+uint32_t ailayer_conv2d_sizeof_trainmem(const ailayer_t *self)
+{
+ const ailayer_conv2d_t *layer = (const ailayer_conv2d_t *)(self->layer_configuration);
+ uint32_t memory = 0;
+
+ memory += aimath_sizeof_tensor(&(layer->weights));
+ memory += aimath_sizeof_tensor(&(layer->bias));
+
+ return memory;
+}
+
+void ailayer_conv2d_set_trainmem(ailayer_t *self, void *memory_ptr)
+{
+ uint32_t address_counter = 0;
+ ailayer_conv2d_t *layer = (ailayer_conv2d_t *)(self->layer_configuration);
+
+ self->gradients[0] = memory_ptr + address_counter;
+ address_counter += sizeof(aitensor_t);
+ self->gradients[0]->dim = layer->weights.dim;
+ self->gradients[0]->dtype = layer->weights.dtype;
+ self->gradients[0]->shape = layer->weights.shape;
+ self->gradients[0]->data = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_data(self->gradients[0]);
+ self->gradients[0]->tensor_params = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_params(self->gradients[0]);
+
+ self->gradients[1] = memory_ptr + address_counter;
+ address_counter += sizeof(aitensor_t);
+ self->gradients[1]->dim = layer->bias.dim;
+ self->gradients[1]->dtype = layer->bias.dtype;
+ self->gradients[1]->shape = layer->bias.shape;
+ self->gradients[1]->data = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_data(self->gradients[1]);
+ self->gradients[1]->tensor_params = memory_ptr + address_counter;
+ address_counter += aimath_sizeof_tensor_params(self->gradients[1]);
+}
+
+#ifdef AIDEBUG_PRINT_MODULE_SPECS
+void ailayer_conv2d_print_specs(const ailayer_t *self, int (*print)(const char *format, ...))
+{
+ const ailayer_conv2d_t *layer = (const ailayer_conv2d_t *)(self->layer_configuration);
+ print("out_channels: %lu, kernel: %lux%lu, stride: %lux%lu",
+ (unsigned long)layer->out_channels,
+ (unsigned long)layer->kernel_height, (unsigned long)layer->kernel_width,
+ (unsigned long)layer->stride_height, (unsigned long)layer->stride_width);
+}
+#endif
diff --git a/src/basic/base/ailayer/ailayer_conv2d.h b/src/basic/base/ailayer/ailayer_conv2d.h
new file mode 100644
index 0000000..947c18f
--- /dev/null
+++ b/src/basic/base/ailayer/ailayer_conv2d.h
@@ -0,0 +1,118 @@
+/**
+ * \file basic/base/ailayer/ailayer_conv2d.h
+ * \internal
+ * \date 27.05.2024
+ * \endinternal
+ * \version 2.0alpha
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#ifndef AILAYER_CONV2D
+#define AILAYER_CONV2D
+
+#include "core/aifes_core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Forward declaration of the Conv2D layer structure. */
+typedef struct ailayer_conv2d ailayer_conv2d_t;
+
+/**
+ * @brief General Conv2D layer structure.
+ */
+struct ailayer_conv2d {
+ ailayer_t base; /**< Inherited field members from general ailayer struct. */
+ const aimath_dtype_t *result_dtype; /**< Data type of the inference result values. */
+ const aimath_dtype_t *weights_dtype; /**< Data type of the weights. */
+ const aimath_dtype_t *bias_dtype; /**< Data type of the bias. */
+
+ /** @name Layer configuration */
+ ///@{
+ uint16_t out_channels; /**< Number of output feature maps. */
+ uint16_t kernel_height; /**< Kernel height. */
+ uint16_t kernel_width; /**< Kernel width. */
+ uint16_t stride_height; /**< Stride along the height dimension. */
+ uint16_t stride_width; /**< Stride along the width dimension. */
+ uint16_t padding_height; /**< Padding applied along the height dimension. */
+ uint16_t padding_width; /**< Padding applied along the width dimension. */
+ uint16_t dilation_height; /**< Dilation along the height dimension. */
+ uint16_t dilation_width; /**< Dilation along the width dimension. */
+ uint16_t groups; /**< Number of blocked connections. */
+ ///@}
+
+ /** @name Trainable parameters */
+ ///@{
+ aitensor_t weights; /**< Tensor containing the convolution kernels. */
+ aitensor_t bias; /**< Tensor containing the bias. */
+
+ uint16_t result_shape[4]; /**< Result tensor shape (batch, channels, height, width). */
+ uint16_t deltas_shape[4]; /**< Delta tensor shape. */
+ uint16_t weights_shape[4]; /**< Weight tensor shape (out_channels, in_channels/groups, kernel_h, kernel_w). */
+ uint16_t bias_shape[1]; /**< Bias tensor shape (out_channels). */
+
+ uint8_t requires_grad; /**< Bit mask to control gradient calculation (bit0: weights, bit1: bias). */
+
+ aitensor_t *trainable_params[2]; /**< Pointers to trainable parameter tensors. */
+ aitensor_t *gradients[2]; /**< Gradient tensors (same ordering as trainable_params). */
+ void *optimem[2]; /**< Memory used by the training optimizer. */
+ ///@}
+
+ /** @name Math functions */
+ ///@{
+ void (*conv)(const aitensor_t *input, const aitensor_t *weights, const aitensor_t *bias,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *output);
+ void (*conv_input_grad)(const aitensor_t *delta_out, const aitensor_t *weights,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *delta_in);
+ void (*conv_weight_grad)(const aitensor_t *delta_out, const aitensor_t *input,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *d_weights);
+ void (*conv_bias_grad)(const aitensor_t *delta_out, aitensor_t *d_bias);
+ ///@}
+};
+
+/** @brief Conv2D layer type indicator. */
+extern const aicore_layertype_t *ailayer_conv2d_type;
+
+ailayer_t *ailayer_conv2d(ailayer_conv2d_t *layer, ailayer_t *input_layer);
+void ailayer_conv2d_forward(ailayer_t *self);
+void ailayer_conv2d_backward(ailayer_t *self);
+void ailayer_conv2d_backward_meProp(ailayer_t *self, float maxBpr, float minBpr, float damping, int dense_counter);
+void ailayer_conv2d_calc_result_shape(ailayer_t *self);
+uint32_t ailayer_conv2d_sizeof_paramem(const ailayer_t *self);
+void ailayer_conv2d_set_paramem(ailayer_t *self, void *memory_ptr);
+uint32_t ailayer_conv2d_sizeof_trainmem(const ailayer_t *self);
+void ailayer_conv2d_set_trainmem(ailayer_t *self, void *memory_ptr);
+
+#ifdef AIDEBUG_PRINT_MODULE_SPECS
+void ailayer_conv2d_print_specs(const ailayer_t *self, int (*print)(const char *format, ...));
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AILAYER_CONV2D
diff --git a/src/basic/default/ailayer/ailayer_conv1d_default.c b/src/basic/default/ailayer/ailayer_conv1d_default.c
new file mode 100644
index 0000000..4292e9e
--- /dev/null
+++ b/src/basic/default/ailayer/ailayer_conv1d_default.c
@@ -0,0 +1,36 @@
+/**
+ * \file basic/default/ailayer/ailayer_conv1d_default.c
+ * \version 2.0alpha
+ * \date 27.05.2024
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#include "basic/default/ailayer/ailayer_conv1d_default.h"
+
+ailayer_t *ailayer_conv1d_f32_default(ailayer_conv1d_f32_t *layer, ailayer_t *input_layer)
+{
+ layer->result_dtype = aif32;
+ layer->weights_dtype = aif32;
+ layer->bias_dtype = aif32;
+
+ layer->conv = aimath_f32_default_conv1d_forward;
+ layer->conv_input_grad = aimath_f32_default_conv1d_input_grad;
+ layer->conv_weight_grad = aimath_f32_default_conv1d_weight_grad;
+ layer->conv_bias_grad = aimath_f32_default_conv_bias_grad;
+
+ return ailayer_conv1d(layer, input_layer);
+}
diff --git a/src/basic/default/ailayer/ailayer_conv1d_default.h b/src/basic/default/ailayer/ailayer_conv1d_default.h
new file mode 100644
index 0000000..7e73b33
--- /dev/null
+++ b/src/basic/default/ailayer/ailayer_conv1d_default.h
@@ -0,0 +1,42 @@
+/**
+ * \file basic/default/ailayer/ailayer_conv1d_default.h
+ * \internal
+ * \date 27.05.2024
+ * \endinternal
+ * \version 2.0alpha
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#ifndef AILAYER_CONV1D_DEFAULT
+#define AILAYER_CONV1D_DEFAULT
+
+#include "basic/base/ailayer/ailayer_conv1d.h"
+#include "basic/default/aimath/aimath_f32_default.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ailayer_conv1d ailayer_conv1d_f32_t;
+
+ailayer_t *ailayer_conv1d_f32_default(ailayer_conv1d_f32_t *layer, ailayer_t *input_layer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AILAYER_CONV1D_DEFAULT
diff --git a/src/basic/default/ailayer/ailayer_conv2d_default.c b/src/basic/default/ailayer/ailayer_conv2d_default.c
new file mode 100644
index 0000000..7cc734a
--- /dev/null
+++ b/src/basic/default/ailayer/ailayer_conv2d_default.c
@@ -0,0 +1,36 @@
+/**
+ * \file basic/default/ailayer/ailayer_conv2d_default.c
+ * \version 2.0alpha
+ * \date 27.05.2024
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#include "basic/default/ailayer/ailayer_conv2d_default.h"
+
+ailayer_t *ailayer_conv2d_f32_default(ailayer_conv2d_f32_t *layer, ailayer_t *input_layer)
+{
+ layer->result_dtype = aif32;
+ layer->weights_dtype = aif32;
+ layer->bias_dtype = aif32;
+
+ layer->conv = aimath_f32_default_conv2d_forward;
+ layer->conv_input_grad = aimath_f32_default_conv2d_input_grad;
+ layer->conv_weight_grad = aimath_f32_default_conv2d_weight_grad;
+ layer->conv_bias_grad = aimath_f32_default_conv_bias_grad;
+
+ return ailayer_conv2d(layer, input_layer);
+}
diff --git a/src/basic/default/ailayer/ailayer_conv2d_default.h b/src/basic/default/ailayer/ailayer_conv2d_default.h
new file mode 100644
index 0000000..c105406
--- /dev/null
+++ b/src/basic/default/ailayer/ailayer_conv2d_default.h
@@ -0,0 +1,42 @@
+/**
+ * \file basic/default/ailayer/ailayer_conv2d_default.h
+ * \internal
+ * \date 27.05.2024
+ * \endinternal
+ * \version 2.0alpha
+ * \copyright Copyright (C) 2020-2024 Fraunhofer Institute for Microelectronic Circuits and Systems.
+ All rights reserved.
+
+ AIfES is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+#ifndef AILAYER_CONV2D_DEFAULT
+#define AILAYER_CONV2D_DEFAULT
+
+#include "basic/base/ailayer/ailayer_conv2d.h"
+#include "basic/default/aimath/aimath_f32_default.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ailayer_conv2d ailayer_conv2d_f32_t;
+
+ailayer_t *ailayer_conv2d_f32_default(ailayer_conv2d_f32_t *layer, ailayer_t *input_layer);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // AILAYER_CONV2D_DEFAULT
diff --git a/src/basic/default/aimath/aimath_f32_default.c b/src/basic/default/aimath/aimath_f32_default.c
index 8103e56..48099a3 100644
--- a/src/basic/default/aimath/aimath_f32_default.c
+++ b/src/basic/default/aimath/aimath_f32_default.c
@@ -24,6 +24,7 @@
#include "basic/default/aimath/aimath_f32_default.h"
#include
+#include
void aimath_f32_default_linear(const aitensor_t *a, const aitensor_t *b, const aitensor_t *c, aitensor_t *result)
@@ -662,19 +663,421 @@ void aimath_f32_default_init_glorot_uniform(aitensor_t *tensor)
*/
void aimath_f32_default_init_he_uniform(aitensor_t *tensor)
{
- float fan_in, fan_avg;
- if(tensor->dim == 2)
- {
- fan_in = tensor->shape[0];
- }
- else if(tensor->dim == 4)
- {
- fan_in = tensor->shape[1] * tensor->shape[2] * tensor->shape[3]; // In channel * kernel_elems
- }
+ float fan_in, fan_avg;
+ if(tensor->dim == 2)
+ {
+ fan_in = tensor->shape[0];
+ }
+ else if(tensor->dim == 4)
+ {
+ fan_in = tensor->shape[1] * tensor->shape[2] * tensor->shape[3]; // In channel * kernel_elems
+ }
- fan_avg = fan_in / 2.0f;
- float r = sqrt(3.0f / fan_avg);
- aimath_f32_default_tensor_init_uniform(tensor, -r, r);
+ fan_avg = fan_in / 2.0f;
+ float r = sqrt(3.0f / fan_avg);
+ aimath_f32_default_tensor_init_uniform(tensor, -r, r);
+}
+
+void aimath_f32_default_conv_bias_grad(const aitensor_t *delta_out, aitensor_t *d_bias)
+{
+ float *grad_data = (float *) d_bias->data;
+ const float *delta_data = (const float *) delta_out->data;
+
+ uint16_t batch = delta_out->shape[0];
+ uint16_t channels = delta_out->shape[1];
+ uint32_t spatial = 1;
+ uint8_t dim;
+ for(dim = 2; dim < delta_out->dim; dim++)
+ {
+ spatial *= delta_out->shape[dim];
+ }
+
+ uint32_t channel_stride = spatial;
+ uint32_t batch_stride = channels * spatial;
+
+ for(uint16_t oc = 0; oc < channels; oc++)
+ {
+ float sum = 0.0f;
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ uint32_t base = n * batch_stride + oc * channel_stride;
+ for(uint32_t idx = 0; idx < spatial; idx++)
+ {
+ sum += delta_data[base + idx];
+ }
+ }
+ grad_data[oc] += sum;
+ }
+}
+
+void aimath_f32_default_conv1d_forward(const aitensor_t *input, const aitensor_t *weights, const aitensor_t *bias,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *output)
+{
+ const float *input_data = (const float *) input->data;
+ const float *weight_data = (const float *) weights->data;
+ const float *bias_data = (bias != 0 && bias->data != 0) ? (const float *) bias->data : 0;
+ float *out_data = (float *) output->data;
+
+ uint16_t batch = input->shape[0];
+ uint16_t in_channels = input->shape[1];
+ uint16_t in_length = input->shape[2];
+ uint16_t out_channels = weights->shape[0];
+ uint16_t kernel = weights->shape[2];
+ uint16_t out_length = output->shape[2];
+ uint16_t channels_per_group = in_channels / groups;
+ uint16_t out_per_group = out_channels / groups;
+
+#ifdef SHAPE_CHECK
+ if(output->shape[1] != out_channels)
+ {
+ LOG_E("Conv1D forward: output channels mismatch.\n");
+ return;
+ }
+ if(channels_per_group * groups != in_channels)
+ {
+ LOG_E("Conv1D forward: invalid group configuration.\n");
+ return;
+ }
+ if(out_per_group * groups != out_channels)
+ {
+ LOG_E("Conv1D forward: invalid output group configuration.\n");
+ return;
+ }
+#endif
+
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ for(uint16_t g = 0; g < groups; g++)
+ {
+ for(uint16_t oc = 0; oc < out_per_group; oc++)
+ {
+ uint16_t oc_global = g * out_per_group + oc;
+ for(uint16_t out_pos = 0; out_pos < out_length; out_pos++)
+ {
+ float sum = bias_data ? bias_data[oc_global] : 0.0f;
+ for(uint16_t ic = 0; ic < channels_per_group; ic++)
+ {
+ uint16_t ic_global = g * channels_per_group + ic;
+ for(uint16_t k = 0; k < kernel; k++)
+ {
+ int32_t in_pos = (int32_t) out_pos * stride - (int32_t) padding + (int32_t) k * dilation;
+ if(in_pos < 0 || in_pos >= in_length)
+ {
+ continue;
+ }
+
+ uint32_t input_index = ((uint32_t) n * in_channels + ic_global) * in_length + (uint32_t) in_pos;
+ uint32_t weight_index = ((uint32_t) oc_global * channels_per_group + ic) * kernel + k;
+ sum += input_data[input_index] * weight_data[weight_index];
+ }
+ }
+ uint32_t out_index = ((uint32_t) n * out_channels + oc_global) * out_length + out_pos;
+ out_data[out_index] = sum;
+ }
+ }
+ }
+ }
+}
+
+void aimath_f32_default_conv1d_input_grad(const aitensor_t *delta_out, const aitensor_t *weights,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *delta_in)
+{
+ float *grad_input = (float *) delta_in->data;
+ const float *delta_data = (const float *) delta_out->data;
+ const float *weight_data = (const float *) weights->data;
+
+ memset(grad_input, 0, aimath_sizeof_tensor_data(delta_in));
+
+ uint16_t batch = delta_out->shape[0];
+ uint16_t out_channels = delta_out->shape[1];
+ uint16_t out_length = delta_out->shape[2];
+ uint16_t in_channels = delta_in->shape[1];
+ uint16_t in_length = delta_in->shape[2];
+ uint16_t kernel = weights->shape[2];
+ uint16_t channels_per_group = in_channels / groups;
+ uint16_t out_per_group = out_channels / groups;
+
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ for(uint16_t g = 0; g < groups; g++)
+ {
+ for(uint16_t oc = 0; oc < out_per_group; oc++)
+ {
+ uint16_t oc_global = g * out_per_group + oc;
+ for(uint16_t out_pos = 0; out_pos < out_length; out_pos++)
+ {
+ float grad = delta_data[((uint32_t) n * out_channels + oc_global) * out_length + out_pos];
+ for(uint16_t ic = 0; ic < channels_per_group; ic++)
+ {
+ uint16_t ic_global = g * channels_per_group + ic;
+ for(uint16_t k = 0; k < kernel; k++)
+ {
+ int32_t in_pos = (int32_t) out_pos * stride - (int32_t) padding + (int32_t) k * dilation;
+ if(in_pos < 0 || in_pos >= in_length)
+ {
+ continue;
+ }
+
+ uint32_t input_index = ((uint32_t) n * in_channels + ic_global) * in_length + (uint32_t) in_pos;
+ uint32_t weight_index = ((uint32_t) oc_global * channels_per_group + ic) * kernel + k;
+ grad_input[input_index] += grad * weight_data[weight_index];
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void aimath_f32_default_conv1d_weight_grad(const aitensor_t *delta_out, const aitensor_t *input,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *d_weights)
+{
+ const float *delta_data = (const float *) delta_out->data;
+ const float *input_data = (const float *) input->data;
+ float *grad_data = (float *) d_weights->data;
+
+ uint16_t batch = input->shape[0];
+ uint16_t in_channels = input->shape[1];
+ uint16_t in_length = input->shape[2];
+ uint16_t out_channels = delta_out->shape[1];
+ uint16_t out_length = delta_out->shape[2];
+ uint16_t kernel = d_weights->shape[2];
+ uint16_t channels_per_group = in_channels / groups;
+ uint16_t out_per_group = out_channels / groups;
+
+ for(uint16_t g = 0; g < groups; g++)
+ {
+ for(uint16_t oc = 0; oc < out_per_group; oc++)
+ {
+ uint16_t oc_global = g * out_per_group + oc;
+ for(uint16_t ic = 0; ic < channels_per_group; ic++)
+ {
+ uint16_t ic_global = g * channels_per_group + ic;
+ for(uint16_t k = 0; k < kernel; k++)
+ {
+ uint32_t weight_index = ((uint32_t) oc_global * channels_per_group + ic) * kernel + k;
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ for(uint16_t out_pos = 0; out_pos < out_length; out_pos++)
+ {
+ int32_t in_pos = (int32_t) out_pos * stride - (int32_t) padding + (int32_t) k * dilation;
+ if(in_pos < 0 || in_pos >= in_length)
+ {
+ continue;
+ }
+
+ uint32_t input_index = ((uint32_t) n * in_channels + ic_global) * in_length + (uint32_t) in_pos;
+ uint32_t delta_index = ((uint32_t) n * out_channels + oc_global) * out_length + out_pos;
+ grad_data[weight_index] += input_data[input_index] * delta_data[delta_index];
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void aimath_f32_default_conv2d_forward(const aitensor_t *input, const aitensor_t *weights, const aitensor_t *bias,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *output)
+{
+ const float *input_data = (const float *) input->data;
+ const float *weight_data = (const float *) weights->data;
+ const float *bias_data = (bias != 0 && bias->data != 0) ? (const float *) bias->data : 0;
+ float *out_data = (float *) output->data;
+
+ uint16_t batch = input->shape[0];
+ uint16_t in_channels = input->shape[1];
+ uint16_t in_height = input->shape[2];
+ uint16_t in_width = input->shape[3];
+ uint16_t out_channels = weights->shape[0];
+ uint16_t kernel_h = weights->shape[2];
+ uint16_t kernel_w = weights->shape[3];
+ uint16_t out_height = output->shape[2];
+ uint16_t out_width = output->shape[3];
+ uint16_t channels_per_group = in_channels / groups;
+ uint16_t out_per_group = out_channels / groups;
+
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ for(uint16_t g = 0; g < groups; g++)
+ {
+ for(uint16_t oc = 0; oc < out_per_group; oc++)
+ {
+ uint16_t oc_global = g * out_per_group + oc;
+ for(uint16_t oh = 0; oh < out_height; oh++)
+ {
+ for(uint16_t ow = 0; ow < out_width; ow++)
+ {
+ float sum = bias_data ? bias_data[oc_global] : 0.0f;
+ for(uint16_t ic = 0; ic < channels_per_group; ic++)
+ {
+ uint16_t ic_global = g * channels_per_group + ic;
+ for(uint16_t kh = 0; kh < kernel_h; kh++)
+ {
+ int32_t ih = (int32_t) oh * stride_height - (int32_t) padding_height + (int32_t) kh * dilation_height;
+ if(ih < 0 || ih >= in_height)
+ {
+ continue;
+ }
+ for(uint16_t kw = 0; kw < kernel_w; kw++)
+ {
+ int32_t iw = (int32_t) ow * stride_width - (int32_t) padding_width + (int32_t) kw * dilation_width;
+ if(iw < 0 || iw >= in_width)
+ {
+ continue;
+ }
+
+ uint32_t input_index = (((uint32_t) n * in_channels + ic_global) * in_height + (uint32_t) ih) * in_width + (uint32_t) iw;
+ uint32_t weight_index = ((((uint32_t) oc_global * channels_per_group + ic) * kernel_h) + kh) * kernel_w + kw;
+ sum += input_data[input_index] * weight_data[weight_index];
+ }
+ }
+ }
+ uint32_t out_index = (((uint32_t) n * out_channels + oc_global) * out_height + oh) * out_width + ow;
+ out_data[out_index] = sum;
+ }
+ }
+ }
+ }
+ }
+}
+
+void aimath_f32_default_conv2d_input_grad(const aitensor_t *delta_out, const aitensor_t *weights,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *delta_in)
+{
+ float *grad_input = (float *) delta_in->data;
+ const float *delta_data = (const float *) delta_out->data;
+ const float *weight_data = (const float *) weights->data;
+
+ memset(grad_input, 0, aimath_sizeof_tensor_data(delta_in));
+
+ uint16_t batch = delta_out->shape[0];
+ uint16_t out_channels = delta_out->shape[1];
+ uint16_t out_height = delta_out->shape[2];
+ uint16_t out_width = delta_out->shape[3];
+ uint16_t in_channels = delta_in->shape[1];
+ uint16_t in_height = delta_in->shape[2];
+ uint16_t in_width = delta_in->shape[3];
+ uint16_t kernel_h = weights->shape[2];
+ uint16_t kernel_w = weights->shape[3];
+ uint16_t channels_per_group = in_channels / groups;
+ uint16_t out_per_group = out_channels / groups;
+
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ for(uint16_t g = 0; g < groups; g++)
+ {
+ for(uint16_t oc = 0; oc < out_per_group; oc++)
+ {
+ uint16_t oc_global = g * out_per_group + oc;
+ for(uint16_t oh = 0; oh < out_height; oh++)
+ {
+ for(uint16_t ow = 0; ow < out_width; ow++)
+ {
+ float grad = delta_data[(((uint32_t) n * out_channels + oc_global) * out_height + oh) * out_width + ow];
+ for(uint16_t ic = 0; ic < channels_per_group; ic++)
+ {
+ uint16_t ic_global = g * channels_per_group + ic;
+ for(uint16_t kh = 0; kh < kernel_h; kh++)
+ {
+ int32_t ih = (int32_t) oh * stride_height - (int32_t) padding_height + (int32_t) kh * dilation_height;
+ if(ih < 0 || ih >= in_height)
+ {
+ continue;
+ }
+ for(uint16_t kw = 0; kw < kernel_w; kw++)
+ {
+ int32_t iw = (int32_t) ow * stride_width - (int32_t) padding_width + (int32_t) kw * dilation_width;
+ if(iw < 0 || iw >= in_width)
+ {
+ continue;
+ }
+
+ uint32_t input_index = (((uint32_t) n * in_channels + ic_global) * in_height + (uint32_t) ih) * in_width + (uint32_t) iw;
+ uint32_t weight_index = ((((uint32_t) oc_global * channels_per_group + ic) * kernel_h) + kh) * kernel_w + kw;
+ grad_input[input_index] += grad * weight_data[weight_index];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void aimath_f32_default_conv2d_weight_grad(const aitensor_t *delta_out, const aitensor_t *input,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *d_weights)
+{
+ const float *delta_data = (const float *) delta_out->data;
+ const float *input_data = (const float *) input->data;
+ float *grad_data = (float *) d_weights->data;
+
+ uint16_t batch = input->shape[0];
+ uint16_t in_channels = input->shape[1];
+ uint16_t in_height = input->shape[2];
+ uint16_t in_width = input->shape[3];
+ uint16_t out_channels = delta_out->shape[1];
+ uint16_t out_height = delta_out->shape[2];
+ uint16_t out_width = delta_out->shape[3];
+ uint16_t kernel_h = d_weights->shape[2];
+ uint16_t kernel_w = d_weights->shape[3];
+ uint16_t channels_per_group = in_channels / groups;
+ uint16_t out_per_group = out_channels / groups;
+
+ for(uint16_t g = 0; g < groups; g++)
+ {
+ for(uint16_t oc = 0; oc < out_per_group; oc++)
+ {
+ uint16_t oc_global = g * out_per_group + oc;
+ for(uint16_t ic = 0; ic < channels_per_group; ic++)
+ {
+ uint16_t ic_global = g * channels_per_group + ic;
+ for(uint16_t kh = 0; kh < kernel_h; kh++)
+ {
+ for(uint16_t kw = 0; kw < kernel_w; kw++)
+ {
+ uint32_t weight_index = ((((uint32_t) oc_global * channels_per_group + ic) * kernel_h) + kh) * kernel_w + kw;
+ for(uint16_t n = 0; n < batch; n++)
+ {
+ for(uint16_t oh = 0; oh < out_height; oh++)
+ {
+ int32_t ih = (int32_t) oh * stride_height - (int32_t) padding_height + (int32_t) kh * dilation_height;
+ if(ih < 0 || ih >= in_height)
+ {
+ continue;
+ }
+ for(uint16_t ow = 0; ow < out_width; ow++)
+ {
+ int32_t iw = (int32_t) ow * stride_width - (int32_t) padding_width + (int32_t) kw * dilation_width;
+ if(iw < 0 || iw >= in_width)
+ {
+ continue;
+ }
+
+ uint32_t input_index = (((uint32_t) n * in_channels + ic_global) * in_height + (uint32_t) ih) * in_width + (uint32_t) iw;
+ uint32_t delta_index = (((uint32_t) n * out_channels + oc_global) * out_height + oh) * out_width + ow;
+ grad_data[weight_index] += input_data[input_index] * delta_data[delta_index];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
}
//Info(?): http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.9.4508&rep=rep1&type=pdf
diff --git a/src/basic/default/aimath/aimath_f32_default.h b/src/basic/default/aimath/aimath_f32_default.h
index 9569fc2..28953f8 100644
--- a/src/basic/default/aimath/aimath_f32_default.h
+++ b/src/basic/default/aimath/aimath_f32_default.h
@@ -1309,5 +1309,33 @@ void getAbsTopKIndices(uint16_t* topK, const uint16_t k, const aitensor_t* tenso
*/
void getAbsTopKIndices_minHeap(uint16_t* topK, const uint16_t k, const aitensor_t* tensor, uint16_t row);
+void aimath_f32_default_conv1d_forward(const aitensor_t *input, const aitensor_t *weights, const aitensor_t *bias,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *output);
+void aimath_f32_default_conv1d_input_grad(const aitensor_t *delta_out, const aitensor_t *weights,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *delta_in);
+void aimath_f32_default_conv1d_weight_grad(const aitensor_t *delta_out, const aitensor_t *input,
+ uint16_t stride, uint16_t padding, uint16_t dilation, uint16_t groups,
+ aitensor_t *d_weights);
+
+void aimath_f32_default_conv2d_forward(const aitensor_t *input, const aitensor_t *weights, const aitensor_t *bias,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *output);
+void aimath_f32_default_conv2d_input_grad(const aitensor_t *delta_out, const aitensor_t *weights,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *delta_in);
+void aimath_f32_default_conv2d_weight_grad(const aitensor_t *delta_out, const aitensor_t *input,
+ uint16_t stride_height, uint16_t stride_width,
+ uint16_t padding_height, uint16_t padding_width,
+ uint16_t dilation_height, uint16_t dilation_width,
+ uint16_t groups, aitensor_t *d_weights);
+
+void aimath_f32_default_conv_bias_grad(const aitensor_t *delta_out, aitensor_t *d_bias);
+
#endif // AIMATH_F32_DEFAULT