Skip to content

Commit 90689e2

Browse files
committed
merge devel
1 parent 21294bb commit 90689e2

153 files changed

Lines changed: 15389 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# ----------------------------------------------------------------------
2+
#
3+
# File: FLoatDWConvTemplate.py
4+
#
5+
# Last edited: 12.05.2025
6+
#
7+
# Copyright (C) 2025, ETH Zurich and University of Bologna.
8+
#
9+
# Author:
10+
# - Calin Diaconu, University of Bologna
11+
#
12+
# ----------------------------------------------------------------------
13+
# SPDX-License-Identifier: Apache-2.0
14+
#
15+
# Licensed under the Apache License, Version 2.0 (the License); you may
16+
# not use this file except in compliance with the License.
17+
# You may obtain a copy of the License at
18+
#
19+
# www.apache.org/licenses/LICENSE-2.0
20+
#
21+
# Unless required by applicable law or agreed to in writing, software
22+
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
23+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24+
# See the License for the specific language governing permissions and
25+
# limitations under the License.
26+
27+
from Deeploy.DeeployTypes import NodeTemplate
28+
29+
reference2DTemplate = NodeTemplate("""
30+
<%
31+
batchOffsetIn = ch_im_in * dim_im_in_x * dim_im_in_y
32+
batchOffsetOut = ch_im_out * dim_im_out_x * dim_im_out_y
33+
%>
34+
// 2D FP Depth-wise Conv (Name: ${nodeName}, Op: ${nodeOp})
35+
BEGIN_SINGLE_CORE
36+
${data_in_type.typeName} ref_${data_out}_${data_in} = ${data_in};
37+
${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out};
38+
for (uint32_t n=0; n<${batch}; ++n) {
39+
DWConv2d_fp${data_in_type.referencedType.typeWidth}_fp${weight_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}_NCHW(
40+
ref_${data_out}_${data_in},
41+
${ch_im_in}, ${dim_im_in_x}, ${dim_im_in_y},
42+
${weight},
43+
${ch_im_out}, ${dim_kernel_x}, ${dim_kernel_y},
44+
${stride_x}, ${stride_y},
45+
${bias},
46+
${has_bias},
47+
ref_${data_out}_${data_out}
48+
);
49+
ref_${data_out}_${data_in} += ${batchOffsetIn};
50+
ref_${data_out}_${data_out} += ${batchOffsetOut};
51+
}
52+
END_SINGLE_CORE
53+
""")
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
set(ProjectId ${TESTNAME})
2+
3+
file(GLOB_RECURSE SOURCES
4+
main.c
5+
)
6+
7+
link_directories(${ProjectId}/../../${GENERATED_SOURCE})
8+
9+
add_deeploy_executable(${ProjectId} EXCLUDE_FROM_ALL ${SOURCES} )
10+
target_link_libraries(${ProjectId} PRIVATE network deeploylib)
11+
# RUN WANG: Link math Lib to Generic Target
12+
target_link_libraries(${ProjectId} PRIVATE m)
13+
14+
add_gvsoc_emulation(${ProjectId})
15+
16+
link_compile_dump(${TESTNAME})
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/*
2+
* ----------------------------------------------------------------------
3+
*
4+
* File: main.c
5+
*
6+
* Last edited: 26.05.2025
7+
*
8+
* Copyright (C) 2025, ETH Zurich and University of Bologna.
9+
*
10+
* Author: Bowen Wang (bowwang@iis.ee.ethz.ch), ETH Zurich
11+
*
12+
* ----------------------------------------------------------------------
13+
* SPDX-License-Identifier: Apache-2.0
14+
*
15+
* Licensed under the Apache License, Version 2.0 (the License); you may
16+
* not use this file except in compliance with the License.
17+
* You may obtain a copy of the License at
18+
*
19+
* www.apache.org/licenses/LICENSE-2.0
20+
*
21+
* Unless required by applicable law or agreed to in writing, software
22+
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
23+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24+
* See the License for the specific language governing permissions and
25+
* limitations under the License.
26+
*/
27+
28+
#include <math.h>
29+
#include <stdint.h>
30+
#include <string.h>
31+
32+
#include "flex_cluster_arch.h"
33+
#include "flex_dma_pattern.h"
34+
#include "flex_printf.h"
35+
#include "flex_runtime.h"
36+
37+
// Deeploy-generated
38+
#include "Network.h"
39+
#include "testinputs.h"
40+
#include "testoutputs.h"
41+
42+
int main() {
43+
uint32_t eoc_val = 0;
44+
flex_barrier_xy_init();
45+
flex_global_barrier_xy();
46+
flex_alloc_init();
47+
flex_intra_cluster_sync();
48+
flex_global_barrier_xy();
49+
flex_intra_cluster_sync();
50+
/**************************************/
51+
/* Program Execution Region -- Start */
52+
/**************************************/
53+
uint32_t CID = flex_get_cluster_id(); // Get cluster ID
54+
uint32_t core_id = flex_get_core_id();
55+
56+
if (CID == 0) { // only allow cluster 0 to work
57+
if (flex_is_first_core()) {
58+
printf("[main.c] >>> Initializing network...\n\n");
59+
InitNetwork(core_id, ARCH_NUM_CORE_PER_CLUSTER);
60+
}
61+
62+
flex_intra_cluster_sync();
63+
64+
if (flex_is_dm_core()) { // allow dm core to init network and dma
65+
for (uint32_t buf = 0; buf < DeeployNetwork_num_inputs; buf++) {
66+
// original data in HBM (placed by loader)
67+
void *ori_addr = testInputVector[buf];
68+
69+
if ((uint64_t)DeeployNetwork_inputs[buf] <
70+
(uint64_t)ARCH_HBM_START_BASE) {
71+
// Trigger DMA transaction: move from HBM to L1
72+
uint64_t mask = 0x00000000ffffffff;
73+
uint64_t masked_addr = (uint64_t)ori_addr & mask;
74+
flex_dma_async_1d(DeeployNetwork_inputs[buf], masked_addr,
75+
DeeployNetwork_inputs_bytes[buf]);
76+
// Wait all DMA transaction done
77+
flex_dma_async_wait_all();
78+
} else {
79+
uint64_t *dst_addr = DeeployNetwork_inputs[buf];
80+
// perform mem_copy with a single core
81+
for (uint32_t i = 0; i < (DeeployNetwork_inputs_bytes[buf] + 7) / 8;
82+
i++) {
83+
uint64_t data = ((uint64_t *)ori_addr)[i];
84+
dst_addr[i] = data;
85+
}
86+
}
87+
}
88+
}
89+
flex_intra_cluster_sync(); // Cluster barrier
90+
91+
if (flex_is_first_core()) { // allow core 0 to compute
92+
printf("[main.c] >>> Running network...\n\n");
93+
}
94+
95+
RunNetwork(core_id, ARCH_NUM_CORE_PER_CLUSTER);
96+
97+
flex_intra_cluster_sync(); // Cluster barrier
98+
99+
// verification
100+
int32_t tot_err = 0;
101+
uint32_t tot = 0;
102+
OUTPUTTYPE diff;
103+
OUTPUTTYPE expected, actual;
104+
105+
if (flex_is_first_core()) {
106+
for (uint32_t buf = 0; buf < DeeployNetwork_num_outputs; buf++) {
107+
tot += DeeployNetwork_outputs_bytes[buf] / sizeof(OUTPUTTYPE);
108+
for (uint32_t i = 0;
109+
i < DeeployNetwork_outputs_bytes[buf] / sizeof(OUTPUTTYPE); i++) {
110+
expected = ((OUTPUTTYPE *)testOutputVector[buf])[i];
111+
actual = ((OUTPUTTYPE *)DeeployNetwork_outputs[buf])[i];
112+
diff = expected - actual;
113+
if (diff != 0) {
114+
tot_err += 1;
115+
printf("Expected: %4d ", expected);
116+
printf("Actual: %4d ", actual);
117+
printf("Diff: %4d at Index %12u in Output %u\r\n", diff, i, buf);
118+
}
119+
}
120+
}
121+
printf("Errors: %d out of %d \r\n", tot_err, tot);
122+
}
123+
flex_intra_cluster_sync(); // Cluster barrier
124+
}
125+
126+
/**************************************/
127+
/* Program Execution Region -- Stop */
128+
/**************************************/
129+
flex_global_barrier_xy();
130+
flex_eoc(eoc_val);
131+
return 0;
132+
}
12.3 KB
Binary file not shown.
1.48 MB
Binary file not shown.
306 Bytes
Binary file not shown.
96.3 KB
Binary file not shown.
1.48 MB
Binary file not shown.
586 Bytes
Binary file not shown.
12.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)