Skip to content

Commit 91c9e8e

Browse files
committed
Align hal software
1 parent 3ab43c7 commit 91c9e8e

2 files changed

Lines changed: 12 additions & 6 deletions

File tree

neureka/hal/neureka_task.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,16 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in,
166166
.d2 = h_out_stride};
167167
task->data.cfg.output_stride = output_stride;
168168

169-
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES;
170169
if (task->kernel_shape == 1) { // 1x1
170+
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1;
171171
task->data.cfg.weights_stride.d1 =
172-
NEUREKA_WEIGHT_BANDWIDTH_BYTES * num_k_in;
172+
(NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in;
173173
} else if (!task->depthwise) { // 3x3
174+
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
174175
task->data.cfg.weights_stride.d1 =
175-
NEUREKA_WEIGHT_BANDWIDTH_BYTES * task->qw * num_k_in;
176+
NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in;
176177
} else { // 3x3 depthwise
178+
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
177179
task->data.cfg.weights_stride.d1 = 0;
178180
}
179181
task->data.cfg.weights_stride.d2 = 0;

neureka/hal/neureka_task_defs.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
#define NNX_NEUREKA_PE_W (4)
3131
#endif
3232

33+
#define NNX_NEUREKA_BANDWIDTH_1x1 (256)
34+
#define NNX_NEUREKA_BANDWIDTH_3x3 (288)
35+
3336
#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H)
3437
#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W)
3538
#define NEUREKA_SUBTILE_INPUT_CHANNEL_1x1 (32)
@@ -38,12 +41,13 @@
3841
#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W+2)
3942
#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (32)
4043

41-
#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (4)
42-
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (4)
44+
#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (NNX_NEUREKA_PE_H)
45+
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (NNX_NEUREKA_PE_W)
4346
#define NEUREKA_SUBTILE_OUTPUT_CHANNEL (32)
4447

4548
#define NEUREKA_OUTPUT_BANDWIDTH_BYTES (32)
46-
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES (32)
49+
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1/8)
50+
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3/8)
4751

4852
#define NEUREKA_ECC_REGS_NUM (4)
4953

0 commit comments

Comments
 (0)