|
15 | 15 | */ |
16 | 16 |
|
17 | 17 | #include "diamond.h" |
| 18 | +#define NUM_WORDS 16 |
| 19 | +extern "C" { |
18 | 20 |
|
19 | | -void diamond(data_t vecIn[N], data_t vecOut[N]) |
| 21 | +void diamond(vecOf16Words* vecIn, vecOf16Words* vecOut, int size) |
20 | 22 | { |
21 | | - data_t c1[N], c2[N], c3[N], c4[N]; |
22 | | -#pragma HLS dataflow |
23 | | - funcA(vecIn, c1, c2); |
24 | | - funcB(c1, c3); |
25 | | - funcC(c2, c4); |
26 | | - funcD(c3, c4, vecOut); |
| 23 | + hls::stream<vecOf16Words> c0, c1, c2, c3, c4, c5; |
| 24 | + assert(size % 16 == 0); |
| 25 | + |
| 26 | + #pragma HLS dataflow |
| 27 | + load(vecIn, c0, size); |
| 28 | + compute_A(c0, c1, c2, size); |
| 29 | + compute_B(c1, c3, size); |
| 30 | + compute_C(c2, c4, size); |
| 31 | + compute_D(c3, c4,c5, size); |
| 32 | + store(c5, vecOut, size); |
| 33 | +} |
| 34 | +} |
| 35 | + |
| 36 | +void load(vecOf16Words *in, hls::stream<vecOf16Words >& out, int size) |
| 37 | +{ |
| 38 | +Loop_Ld: |
| 39 | + for (int i = 0; i < size; i++) |
| 40 | + { |
| 41 | + #pragma HLS performance target_ti=32 |
| 42 | + #pragma HLS LOOP_TRIPCOUNT max=32 |
| 43 | + out.write(in[i]); |
| 44 | + } |
27 | 45 | } |
28 | 46 |
|
29 | | -void funcA(data_t *in, data_t *out1, data_t *out2) |
| 47 | +void compute_A(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out1, hls::stream<vecOf16Words >& out2, int size) |
30 | 48 | { |
31 | | -Loop0: |
32 | | - for (int i = 0; i < N; i++) |
| 49 | +Loop_A: |
| 50 | + for (int i = 0; i < size; i++) |
33 | 51 | { |
34 | | -#pragma HLS pipeline II=1 rewind |
35 | | -//#pragma HLS pipeline rewind |
36 | | -//#pragma HLS unroll factor = 2 |
37 | | - data_t t = in[i] * 3; |
38 | | - out1[i] = t; |
39 | | - out2[i] = t; |
| 52 | + #pragma HLS performance target_ti=32 |
| 53 | + #pragma HLS LOOP_TRIPCOUNT max=32 |
| 54 | + vecOf16Words t = in.read(); |
| 55 | + out1.write(t * 3); |
| 56 | + out2.write(t * 3); |
40 | 57 | } |
41 | 58 | } |
42 | 59 |
|
43 | | -void funcB(data_t *in, data_t *out) |
| 60 | +void compute_B(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out, int size) |
44 | 61 | { |
45 | | -Loop0: |
46 | | - for (int i = 0; i < N; i++) |
| 62 | +Loop_B: |
| 63 | + for (int i = 0; i < size; i++) |
47 | 64 | { |
48 | | -#pragma HLS pipeline II=1 rewind |
49 | | -//#pragma HLS pipeline rewind |
50 | | -//#pragma HLS unroll factor = 2 |
51 | | - out[i] = in[i] + 25; |
| 65 | + #pragma HLS performance target_ti=32 |
| 66 | + #pragma HLS LOOP_TRIPCOUNT max=32 |
| 67 | + out.write(in.read() + 25); |
52 | 68 | } |
53 | 69 | } |
54 | 70 |
|
55 | | -void funcC(data_t *in, data_t *out) |
| 71 | + |
| 72 | +void compute_C(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out, int size) |
| 73 | +{ |
| 74 | +Loop_C: |
| 75 | + for (data_t i = 0; i < size; i++) |
| 76 | + { |
| 77 | + #pragma HLS performance target_ti=32 |
| 78 | + #pragma HLS LOOP_TRIPCOUNT max=32 |
| 79 | + out.write(in.read() * 2); |
| 80 | + } |
| 81 | +} |
| 82 | +void compute_D(hls::stream<vecOf16Words >& in1, hls::stream<vecOf16Words >& in2, hls::stream<vecOf16Words >& out, int size) |
56 | 83 | { |
57 | | -Loop0: |
58 | | - for (data_t i = 0; i < N; i++) |
| 84 | +Loop_D: |
| 85 | + for (data_t i = 0; i < size; i++) |
59 | 86 | { |
60 | | -#pragma HLS pipeline II=1 rewind |
61 | | -//#pragma HLS pipeline rewind |
62 | | -//#pragma HLS unroll factor = 2 |
63 | | - out[i] = in[i] * 2; |
| 87 | + #pragma HLS performance target_ti=32 |
| 88 | + #pragma HLS LOOP_TRIPCOUNT max=32 |
| 89 | + out.write(in1.read() + in2.read()); |
64 | 90 | } |
65 | 91 | } |
66 | 92 |
|
67 | | -void funcD(data_t *in1, data_t *in2, data_t *out) |
| 93 | +void store(hls::stream<vecOf16Words >& in, vecOf16Words *out, int size) |
68 | 94 | { |
69 | | -Loop0: |
70 | | - for (int i = 0; i < N; i++) |
| 95 | +Loop_St: |
| 96 | + for (int i = 0; i < size; i++) |
71 | 97 | { |
72 | | -#pragma HLS pipeline II=1 rewind |
73 | | -//#pragma HLS pipeline rewind |
74 | | -//#pragma HLS unroll factor = 2 |
75 | | - out[i] = in1[i] + in2[i] * 2; |
| 98 | + #pragma HLS performance target_ti=32 |
| 99 | + #pragma HLS LOOP_TRIPCOUNT max=32 |
| 100 | + out[i] = in.read(); |
76 | 101 | } |
77 | 102 | } |
0 commit comments