Skip to content

Commit 1d19087

Browse files
Merge pull request #7 from ramanan-radhakrishnan/master
2022.1 Update
2 parents 6ec6726 + dbfdc38 commit 1d19087

42 files changed

Lines changed: 1600 additions & 163 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Dataflow/Channels/Vitis/Makefile

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
TARGET := hw
2+
3+
build: diamond.$(TARGET).xclbin host
4+
5+
diamond.$(TARGET).xo: ../using_fifos/diamond.cpp
6+
v++ -c -g -t $(TARGET) -R 1 -k diamond \
7+
--save-temps \
8+
--temp_dir ./temp_dir \
9+
--report_dir ./report_dir \
10+
--log_dir ./log_dir \
11+
--config ./options.cfg \
12+
-I. \
13+
../using_fifos/diamond.cpp \
14+
-o ./diamond.$(TARGET).xo
15+
16+
diamond.$(TARGET).xclbin: diamond.$(TARGET).xo
17+
v++ -l -g -t $(TARGET) -R 1 \
18+
--temp_dir ./temp_dir \
19+
--report_dir ./report_dir \
20+
--log_dir ./log_dir \
21+
--config ./options.cfg \
22+
-I. \
23+
diamond.$(TARGET).xo \
24+
-o diamond.$(TARGET).xclbin
25+
26+
host: ./diamond_host.cpp
27+
mkdir -p build/
28+
g++ -D__USE_XOPEN2K8 -D__USE_XOPEN2K8 \
29+
-I$(XILINX_XRT)/include/ \
30+
-I. \
31+
-O3 -Wall -fmessage-length=0 -std=c++1y\
32+
./diamond_host.cpp \
33+
-L$(XILINX_XRT)/lib/ \
34+
-lxrt_coreutil -lpthread \
35+
-o ./host
36+
37+
xclbin: diamond.xclbin
38+
39+
xo: diamond.xo
40+
41+
run: build
42+
@echo "Running $(TAGET) mode";
43+
ifeq ($(TARGET), hw)
44+
@echo "************ Use Command Line to run application! ************"
45+
./host ./diamond.hw.xclbin ;
46+
else
47+
@echo "Running $(TAGET) mode";
48+
emconfigutil --nd 1 --platform xilinx_u250_gen3x16_xdma_3_1_202020_1 --od ./
49+
XCL_EMULATION_MODE=$(TARGET) ./host ./diamond.$(TARGET).xclbin ;
50+
endif
51+
52+
clean:
53+
rm -rf temp_dir log_dir report_dir *log *.csv diamond.hw.ltx diamond.hw.xclbin.info emconfig.json
54+
55+
cleanall: clean
56+
rm -rf diamond.*.xo diamond.*.xclbin *summary
57+

Dataflow/Channels/Vitis/README

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
This example shows how to use FIFOs instead of the default PIPOs as the channel type.
2+
3+
Files Included in this Package
4+
==============================
5+
diamond_host.cpp
6+
Makefile
7+
options.cfg
8+
types.h
9+
xrt.ini
10+
README
11+
12+
Building and Running the Application
13+
=========================================================
14+
make run TARGET=hw
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/**
2+
* Copyright (C) 2019-2021 Xilinx, Inc
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"). You may
5+
* not use this file except in compliance with the License. A copy of the
6+
* License is located at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations
14+
* under the License.
15+
*/
16+
17+
#include <iostream>
18+
#include <cstring>
19+
#include <vector>
20+
21+
// XRT includes
22+
#include "experimental/xrt_bo.h"
23+
#include "experimental/xrt_device.h"
24+
#include "experimental/xrt_kernel.h"
25+
26+
#include "types.h"
27+
28+
int main(int argc, char** argv) {
29+
unsigned int device_index = 0;
30+
std::string binaryFile = argv[1];
31+
32+
std::cout << "Open the device" << device_index << std::endl;
33+
auto device = xrt::device(device_index);
34+
std::cout << "Opened the device" << device_index << std::endl;
35+
36+
37+
auto uuid = device.load_xclbin(binaryFile);
38+
39+
size_t vector_size_bytes = sizeof(int) * totalNumWords;
40+
41+
auto krnl = xrt::kernel(device, uuid, "diamond");
42+
43+
std::cout << "Allocate Buffer in Global Memory\n";
44+
auto bufIn = xrt::bo(device, vector_size_bytes, krnl.group_id(0));
45+
auto bufOut = xrt::bo(device, vector_size_bytes, krnl.group_id(1));
46+
47+
// Map the contents of the buffer object into host memory
48+
auto bufIn_map = bufIn.map<int*>();
49+
auto bufOut_map = bufOut.map<int*>();
50+
std::fill(bufIn_map, bufIn_map + totalNumWords, 0);
51+
std::fill(bufOut_map, bufOut_map + totalNumWords, 0);
52+
53+
// Initialize the input data
54+
for (int i = 0; i < totalNumWords; i++)
55+
bufIn_map[i] = (uint32_t)i;
56+
std::cout << "The Test Data initialized" << std::endl;
57+
58+
59+
// Create the reference golden data for comparison
60+
int bufReference[totalNumWords];
61+
for (int i = 0; i < totalNumWords; ++i) {
62+
bufReference[i] = ((i*3)+25)+((i*3)*2);
63+
}
64+
std::cout << "The Test Data created" << std::endl;
65+
66+
67+
68+
std::cout << "Execution of the kernel\n";
69+
xrt::run run[3];
70+
71+
for (int i = 0; i < 3; i++) {
72+
std::cout << "synchronize input buffer data to device global memory\n";
73+
bufIn.sync(XCL_BO_SYNC_BO_TO_DEVICE);
74+
run[i] = krnl(bufIn,bufOut,totalNumWords/16);
75+
run[i].wait();
76+
std::cout << "Get the output data from the device" << std::endl;
77+
bufOut.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
78+
}
79+
80+
81+
#if debug
82+
for (int i = 0; i < totalNumWords; i++)
83+
{
84+
std::cout << "Referece " << bufReference[i] << std::endl;
85+
std::cout << "Out " << bufOut_map[i] << std::endl;
86+
}
87+
#endif
88+
89+
// Validate our results
90+
if (std::memcmp(bufOut_map, bufReference, totalNumWords))
91+
throw std::runtime_error("Value read back does not match reference");
92+
std::cout << "TEST PASSED\n";
93+
return 0;
94+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
platform=xilinx_u250_gen3x16_xdma_3_1_202020_1
2+
[profile]
3+
data=all:all:all
4+
stall=all:all:all

Dataflow/Channels/Vitis/types.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <vector>
2+
3+
#define totalNumWords 512
4+
// Each vector will be 64 bytes (16 x 4 bytes)
5+
// typedef std::vector<uint32_t, aligned_allocator<uint32_t>> vecOf16Words;
6+
//typedef std::vector<uint32_t, aligned_allocator<uint32_t>> vecTotalWords;

Dataflow/Channels/Vitis/xrt.ini

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[Emulation]
2+
debug_mode=batch
3+
[Debug]
4+
native_xrt_trace=true
5+
device_trace=true

Dataflow/Channels/using_fifos/diamond.cpp

Lines changed: 62 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,63 +15,88 @@
1515
*/
1616

1717
#include "diamond.h"
18+
#define NUM_WORDS 16
19+
extern "C" {
1820

19-
void diamond(data_t vecIn[N], data_t vecOut[N])
21+
void diamond(vecOf16Words* vecIn, vecOf16Words* vecOut, int size)
2022
{
21-
data_t c1[N], c2[N], c3[N], c4[N];
22-
#pragma HLS dataflow
23-
funcA(vecIn, c1, c2);
24-
funcB(c1, c3);
25-
funcC(c2, c4);
26-
funcD(c3, c4, vecOut);
23+
hls::stream<vecOf16Words> c0, c1, c2, c3, c4, c5;
24+
assert(size % 16 == 0);
25+
26+
#pragma HLS dataflow
27+
load(vecIn, c0, size);
28+
compute_A(c0, c1, c2, size);
29+
compute_B(c1, c3, size);
30+
compute_C(c2, c4, size);
31+
compute_D(c3, c4,c5, size);
32+
store(c5, vecOut, size);
33+
}
34+
}
35+
36+
void load(vecOf16Words *in, hls::stream<vecOf16Words >& out, int size)
37+
{
38+
Loop_Ld:
39+
for (int i = 0; i < size; i++)
40+
{
41+
#pragma HLS performance target_ti=32
42+
#pragma HLS LOOP_TRIPCOUNT max=32
43+
out.write(in[i]);
44+
}
2745
}
2846

29-
void funcA(data_t *in, data_t *out1, data_t *out2)
47+
void compute_A(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out1, hls::stream<vecOf16Words >& out2, int size)
3048
{
31-
Loop0:
32-
for (int i = 0; i < N; i++)
49+
Loop_A:
50+
for (int i = 0; i < size; i++)
3351
{
34-
#pragma HLS pipeline II=1 rewind
35-
//#pragma HLS pipeline rewind
36-
//#pragma HLS unroll factor = 2
37-
data_t t = in[i] * 3;
38-
out1[i] = t;
39-
out2[i] = t;
52+
#pragma HLS performance target_ti=32
53+
#pragma HLS LOOP_TRIPCOUNT max=32
54+
vecOf16Words t = in.read();
55+
out1.write(t * 3);
56+
out2.write(t * 3);
4057
}
4158
}
4259

43-
void funcB(data_t *in, data_t *out)
60+
void compute_B(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out, int size)
4461
{
45-
Loop0:
46-
for (int i = 0; i < N; i++)
62+
Loop_B:
63+
for (int i = 0; i < size; i++)
4764
{
48-
#pragma HLS pipeline II=1 rewind
49-
//#pragma HLS pipeline rewind
50-
//#pragma HLS unroll factor = 2
51-
out[i] = in[i] + 25;
65+
#pragma HLS performance target_ti=32
66+
#pragma HLS LOOP_TRIPCOUNT max=32
67+
out.write(in.read() + 25);
5268
}
5369
}
5470

55-
void funcC(data_t *in, data_t *out)
71+
72+
void compute_C(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out, int size)
73+
{
74+
Loop_C:
75+
for (data_t i = 0; i < size; i++)
76+
{
77+
#pragma HLS performance target_ti=32
78+
#pragma HLS LOOP_TRIPCOUNT max=32
79+
out.write(in.read() * 2);
80+
}
81+
}
82+
void compute_D(hls::stream<vecOf16Words >& in1, hls::stream<vecOf16Words >& in2, hls::stream<vecOf16Words >& out, int size)
5683
{
57-
Loop0:
58-
for (data_t i = 0; i < N; i++)
84+
Loop_D:
85+
for (data_t i = 0; i < size; i++)
5986
{
60-
#pragma HLS pipeline II=1 rewind
61-
//#pragma HLS pipeline rewind
62-
//#pragma HLS unroll factor = 2
63-
out[i] = in[i] * 2;
87+
#pragma HLS performance target_ti=32
88+
#pragma HLS LOOP_TRIPCOUNT max=32
89+
out.write(in1.read() + in2.read());
6490
}
6591
}
6692

67-
void funcD(data_t *in1, data_t *in2, data_t *out)
93+
void store(hls::stream<vecOf16Words >& in, vecOf16Words *out, int size)
6894
{
69-
Loop0:
70-
for (int i = 0; i < N; i++)
95+
Loop_St:
96+
for (int i = 0; i < size; i++)
7197
{
72-
#pragma HLS pipeline II=1 rewind
73-
//#pragma HLS pipeline rewind
74-
//#pragma HLS unroll factor = 2
75-
out[i] = in1[i] + in2[i] * 2;
98+
#pragma HLS performance target_ti=32
99+
#pragma HLS LOOP_TRIPCOUNT max=32
100+
out[i] = in.read();
76101
}
77102
}

Dataflow/Channels/using_fifos/diamond.h

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,30 @@
1414
* limitations under the License.
1515
*/
1616

17-
#define N 100
18-
typedef unsigned char data_t;
17+
typedef unsigned int data_t;
18+
#include <vector>
19+
#include<hls_vector.h>
20+
#include<hls_stream.h>
21+
#include <iostream>
1922

23+
#define NUM_WORDS 16
24+
25+
// Each vector will be 64 bytes (16 x 4 bytes)
26+
typedef hls::vector<uint32_t, NUM_WORDS> vecOf16Words;
27+
28+
29+
30+
extern "C" {
2031
// Top function
21-
void diamond(data_t vecIn[N], data_t vecOut[N]);
32+
void diamond(vecOf16Words *vecIn, vecOf16Words *vecOut, int size);
33+
}
2234

2335
// Sub functions
24-
void funcA(data_t f1In[N], data_t f1Out[N], data_t f1bisOut[N]);
25-
void funcB(data_t f2In[N], data_t f2Out[N]);
26-
void funcC(data_t f3In[N], data_t f3Out[N]);
27-
void funcD(data_t f4In[N], data_t f4bisIn[N], data_t f4Out[N]);
36+
37+
void load(vecOf16Words *in, hls::stream<vecOf16Words> & out, int vSize);
38+
void compute_A(hls::stream<vecOf16Words>& in, hls::stream<vecOf16Words >& out1, hls::stream<vecOf16Words >& out2, int vSize);
39+
void compute_B(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out, int vSize);
40+
void compute_C(hls::stream<vecOf16Words >& in, hls::stream<vecOf16Words >& out, int vSize);
41+
void compute_D(hls::stream<vecOf16Words >& in1, hls::stream<vecOf16Words >& in2, hls::stream<vecOf16Words >& out, int vSize);
42+
void store(hls::stream<vecOf16Words >& in, vecOf16Words *out, int vSize);
43+

0 commit comments

Comments
 (0)