Skip to content

Commit 1373d1d

Browse files
committed
Merge globaleveragepool header to avgpool header
1 parent 9767ca6 commit 1373d1d

4 files changed

Lines changed: 36 additions & 50 deletions

File tree

TargetLibraries/PULPOpen/inc/DeeployPULPKernels.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "kernel/BatchNorm.h"
1616
#include "kernel/Conv.h"
1717
#include "kernel/GELU.h"
18-
#include "kernel/GlobalAveragePool.h"
1918
#include "kernel/Layernorm.h"
2019
#include "kernel/Matmul.h"
2120
#include "kernel/MaxPool.h"

TargetLibraries/PULPOpen/inc/DeeployPULPMath.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include "kernel/BatchNorm.h"
2828
#include "kernel/Conv.h"
2929
#include "kernel/GELU.h"
30-
#include "kernel/GlobalAveragePool.h"
3130
#include "kernel/Layernorm.h"
3231
#include "kernel/Matmul.h"
3332
#include "kernel/MaxPool.h"

TargetLibraries/PULPOpen/inc/kernel/AvgPool.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,40 @@ void PULP_AvgPool2d_fp32_fp32_CHW(const float32_t *__restrict__ pSrcA,
2424
uint32_t pad_top, uint32_t pad_bottom,
2525
uint32_t pad_left, uint32_t pad_right);
2626

27+
/**
28+
* @brief Global Average Pooling forward pass (NCHW layout).
29+
*
30+
* For each (n, c), computes the mean over all (h, w) spatial positions:
31+
* output[n*C + c] = sum_{h,w}(input[(n*C+c)*H*W + h*W + w]) / (H*W)
32+
*
33+
* Parallelized over channels: each core handles a contiguous chunk of channels.
34+
*
35+
* @param input Input tensor [N, C, H, W] NCHW float32
36+
* @param output Output tensor [N, C, 1, 1] stored as [N*C] float32
37+
* @param N Batch size
38+
* @param C Number of channels
39+
* @param H Spatial height
40+
* @param W Spatial width
41+
*/
42+
void PULP_GlobalAveragePool_fp32(const float32_t *input, float32_t *output,
43+
uint32_t N, uint32_t C, uint32_t H, uint32_t W);
44+
45+
/**
46+
* @brief Global Average Pooling backward pass (NCHW layout).
47+
*
48+
* Distributes the upstream gradient evenly across all spatial positions:
49+
* dX[n,c,h,w] = dY[n*C + c] / (H*W)
50+
*
51+
* Parallelized over channels: each core handles a contiguous chunk of channels.
52+
*
53+
* @param dY Upstream gradient [N, C, 1, 1] stored as [N*C] float32
54+
* @param dX Gradient w.r.t. input [N, C, H, W] NCHW float32
55+
* @param N Batch size
56+
* @param C Number of channels
57+
* @param H Spatial height
58+
* @param W Spatial width
59+
*/
60+
void PULP_GlobalAveragePoolGrad_fp32(const float32_t *dY, float32_t *dX,
61+
uint32_t N, uint32_t C, uint32_t H, uint32_t W);
62+
2763
#endif // __DEEPLOY_MATH_AVGPOOL_KERNEL_HEADER_

TargetLibraries/PULPOpen/inc/kernel/GlobalAveragePool.h

Lines changed: 0 additions & 48 deletions
This file was deleted.

0 commit comments

Comments
 (0)