@@ -203,6 +203,13 @@ __device__ void ReduceSum(AFloat *result, AFloat * sdata)
203203 __syncthreads ();
204204}
205205
206+ template <typename AFloat>
207+ __device__ AFloat max (AFloat x, AFloat y)
208+ {
209+ if (x < y) return y;
210+ return x;
211+ }
212+
206213// //////////////////////////////////////////////////////////////////////////////////
207214// / \brief Calculate the dimension of an output volume, given the sliding parameters
208215// / and the input shape.
@@ -901,6 +908,64 @@ __global__ void Downsample(AFloat * output, AFloat * indexMatrix, const AFloat *
901908
902909}
903910
911+ // ///////////////////////////////////////////////////////////////////////////////////////////////
912+ // / \brief Back-propagate the gradients through a max-pooling layer.
913+ // /
914+ // / \param[out] gradientsBackward The gradients to be written. One gradient for each neuron at the layers's input.
915+ // / \param[in] gradients The gradients coming from the next layer. One gradient for each receptive field.
916+ // / \param[in] indexMatrix Winning indices. One index for each receptive field.
917+ // / \param[in] depth The depth of the input tensor.
918+ // / \param[in] imgHeight The height of the input tensor.
919+ // / \param[in] imgWidth The output of the input tensor
920+ // / \param[in] fltHeight Height of the filter.
921+ // / \param[in] fltWidth Width of the filter.
922+ // / \param[in] strideRows stride size in the horizontal dimension.
923+ // / \param[in] strideCols stride size in the vertical dimension.
924+ // ///////////////////////////////////////////////////////////////////////////////////////////////
925+ template <typename AFloat>
926+ __global__ void MaxPoolBackward (AFloat * activationGradientsBackward,
927+ const AFloat * activationGradients,
928+ const AFloat * indexMatrix,
929+ int depth, int imgHeight, int imgWidth, int fltHeight, int fltWidth,
930+ int strideRows, int strideCols)
931+ {
932+ int slice = blockDim .y * blockIdx .y + threadIdx .y ; // row of the gradientsBackward matrix.
933+ int j = blockDim .x * blockIdx .x + threadIdx .x ; // column of the gradientsBackward matrix.
934+
935+ if (slice >= depth || j >= imgHeight * imgWidth) return ;
936+
937+ int height = calculateDimension (imgHeight, fltHeight, 0 , strideRows);
938+ int width = calculateDimension (imgWidth, fltWidth, 0 , strideCols);
939+
940+ // Which gradientsBackward element should this thread write to?
941+ int backRow = j % imgHeight;
942+ int backCol = j / imgHeight;
943+ int backIndex = (backCol + backRow * imgWidth) * depth + slice;
944+
945+ // Which gradient and indexMatrix elements should this thread read?
946+ int nextRowMin = floor ((backRow - fltHeight) / (AFloat)strideRows) + 1 ;
947+ int nextColMin = floor ((backCol - fltWidth) / (AFloat)strideCols) + 1 ;
948+
949+ int outputIndex = 0 ;
950+ AFloat grad = 0 ;
951+
952+ // Iterate over all output elements that were the outcome of receptive fields I was part of.
953+ for (int row = nextRowMin; row <= nextRowMin + fltHeight - strideRows; row++) {
954+ for (int col = nextColMin; col <= nextColMin + fltWidth - strideCols; col++) {
955+
956+ if (row >= height || col >= width || col < 0 || row < 0 ) continue ;
957+
958+ outputIndex = (row * width + col) * depth + slice;
959+
960+ // Was I the winning index within this receptive field?
961+ if (indexMatrix[outputIndex] == backCol + backRow * imgWidth) {
962+ grad += activationGradients[outputIndex];
963+ }
964+ }
965+ }
966+ activationGradientsBackward[(backCol + backRow * imgWidth) * depth + slice] = grad;
967+ }
968+
904969} // namespace Cuda
905970} // namespace DNN
906971} // namespace TMVA
0 commit comments