Skip to content

Commit 859827d

Browse files
SDA USRsdausr
authored andcommitted
xf_compression: Next branch update to Vitis Libraries (#330)
* Squashed 'hpc/' changes from 1c6ac0e..f28aa9a f28aa9a update release notes e9f956a Merge branch 'dev2021.1' into next 04c17bc update release notes 366f577 update release notes 26599b6 Merge branch 'dev2021.1' into next 4e191d6 updates a40a413 update notes 01d565a Merge branch 'next' of gitenterprise.xilinx.com:FaaSApps/xf_hpc into next fd999c0 Merge branch 'dev2021.1' into next dbe158b fix version error bb0beb4 Merge pull request #77 from liangm/next 95b21eb merge dev2021.1 4ee28f5 Merge branch 'dev2021.1' of gitenterprise.xilinx.com:FaaSApps/xf_hpc into dev2021.1 a188c06 update makefiles 26fd0ea update release notes b7d6078 Update params.mk 550280c Update params.mk git-subtree-dir: hpc git-subtree-split: f28aa9aab61bf0cb761a7844986cad6a2320479f * Squashed 'data_compression/' changes from 4db8dae..ce5acc0 ce5acc0 Release specific fixes 63596f3 Release specific fixes c02b4be Release specific fixes ed0a9d8 Minor fix for release 2e186b4 Gzip L2: bug fix for list mode 5f95401 minor fix for release 43812e6 Merge branch 'next' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 2e8760d SC related changes 9f9bda7 Update latest QoR golden and README gen 27bf913 Use low latency gzip decompres for L2/demos/gzip 9526a92 Update tool version -> 2021.1 1b76477 clang format fix f7d85cf clang format fix cbd220e update tool version b4b6db2 GUI minor fix b79bb61 minor fix 2cab319 update READMEs 1ebad52 update READMEs 5580a36 update golden with latest 0f8fcc6 GUI and CLI compatibility for versal and DC: add data folder 1fa92b2 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 4c71b25 disable hw_build b3a6118 updated with cases failed in PR-300 for zlib.cpp 10be73a enable low latency in gzipDemo b7dac71 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 2351a05 gzipc bad alloc fix 8d562f4 Added all ZLIB variants and respective READMEs c30f59b 21.1 GUI fix 7e4da89 Makegen: generate xrt.ini with updated profiling flags in 2021.1 a65331d Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next d23062f Minor fix in gzip_hbm 38a069f minor fix d875e3a minor fix 3112cd0 [Docs]: Release Notes added df7e75f minor fix f279b62 Merge branch 'relnote' of https://gitenterprise.xilinx.com/kalib/xf_compression into relnote 4696128 moved it under library overview af26aea Update release.rst 09b6ae3 updated release notes bc97d77 fixed zstd decompress to support multiframe e4ca0a2 [ZLIB Product]: Inflate extra copy optimization for libz 0e850b8 [Deflate Extra Copy]: Updated with latest master changes & synced 0290951 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next ea9ce9f GZip block mm: host KT issue e9fabbd updated makefile to remove pthread content 340c2cd Next branch regressions minor fixes b9dd82e Fixed mozilla issue + resource optimizations. 209346e fix broken links 7650d92 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 6fb5430 Update run.sh with -mcr option for large file sample_run.txt 31a950d Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 3cafacd Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 37b047b fix utils.mk for master e030245 Added max CR option to handle from base class 5bd81ba fix utils.mk for master 457d79e CR 1100969 Minor fix cb52fc6 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 452acfd lz4 compress/decompress is now std compliance and no original size use 9efbaaf Zstd compress: optimized sequence encoder and input distributer 6930dfc Updated GZIP MM design 76d0521 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 6f573f7 libzso: Device selection & deflate bug 903f7c0 CR-1101226 change path of benchmark.rst d1dc255 Merge branch 'master' of https://gitenterprise.xilinx.com/FaaSApps/xf_compression into next 840935d Updated GZIP designs based on block sizes and updated READMEs for L1 & L2 git-subtree-dir: data_compression git-subtree-split: ce5acc03519dec3d6f0d8e0bc5d6361c5062a36b Co-authored-by: sdausr <sdausr@xilinx.com>
1 parent bc444c0 commit 859827d

484 files changed

Lines changed: 7157 additions & 2477 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

data_compression/L1/include/hw/checksum_wrapper.hpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,12 +174,19 @@ void checksum32(hls::stream<ap_uint<32> >& checksumInitStrm,
174174
template <int W>
175175
void checksum32(hls::stream<ap_uint<32> >& checksumInitStrm,
176176
hls::stream<ap_uint<8 * W> >& inStrm,
177-
hls::stream<ap_uint<32> >& inLenStrm,
178-
hls::stream<bool>& endInStrm,
177+
hls::stream<ap_uint<5> >& inLenStrm,
179178
hls::stream<ap_uint<32> >& outStrm,
180-
hls::stream<bool>& endOutStrm,
181179
hls::stream<ap_uint<2> >& checksumTypeStrm) {
180+
// Internal EOS Streams
181+
hls::stream<bool> endInStrm;
182+
hls::stream<bool> endOutStrm;
183+
#pragma HLS STREAM variable = endInStrm depth = 4
184+
#pragma HLS STREAM variable = endOutStrm depth = 4
185+
186+
checksum_loop:
182187
for (ap_uint<2> checksumType = checksumTypeStrm.read(); checksumType != 3; checksumType = checksumTypeStrm.read()) {
188+
endInStrm << false;
189+
endInStrm << true;
183190
// CRC
184191
if (checksumType == 1) {
185192
xf::security::crc32<W>(checksumInitStrm, inStrm, inLenStrm, endInStrm, outStrm, endOutStrm);
@@ -188,6 +195,8 @@ void checksum32(hls::stream<ap_uint<32> >& checksumInitStrm,
188195
else {
189196
xf::security::adler32<W>(checksumInitStrm, inStrm, inLenStrm, endInStrm, outStrm, endOutStrm);
190197
}
198+
endOutStrm.read();
199+
endOutStrm.read();
191200
}
192201
}
193202

data_compression/L1/include/hw/lz4_specs.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ const auto MAGIC_BYTE_1 = 4;
4949
const auto MAGIC_BYTE_2 = 34;
5050
const auto MAGIC_BYTE_3 = 77;
5151
const auto MAGIC_BYTE_4 = 24;
52-
const auto FLG_BYTE = 104;
52+
const auto FLG_BYTE = 100;
5353

5454
/**
5555
* This value is used to set

data_compression/L1/include/hw/lz_compress.hpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,26 @@ template <int MAX_INPUT_SIZE = 64 * 1024,
177177
int MATCH_LEN,
178178
int MIN_MATCH,
179179
int LZ_MAX_OFFSET_LIMIT,
180-
int CORE_ID = 0,
180+
int NUM_BLOCKS = 8,
181181
int MATCH_LEVEL = 6,
182182
int MIN_OFFSET = 1,
183183
int LZ_DICT_SIZE = 1 << 12,
184184
int LEFT_BYTES = 64>
185-
void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<IntVectorStream_dt<32, 1> >& outStream) {
185+
void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream,
186+
hls::stream<IntVectorStream_dt<32, 1> >& outStream,
187+
uint8_t ii) {
186188
const uint16_t c_indxBitCnts = 24;
187189
const uint16_t c_fifo_depth = LEFT_BYTES + 2;
188190
const int c_dictEleWidth = (MATCH_LEN * 8 + c_indxBitCnts);
189191
typedef ap_uint<MATCH_LEVEL * c_dictEleWidth> uintDictV_t;
190192
typedef ap_uint<c_dictEleWidth> uintDict_t;
191193
const uint32_t totalDictSize = (1 << (c_indxBitCnts - 1)); // 8MB based on index 3 bytes
192-
static uint32_t relativeInSize = 0;
193-
static bool resetDictFlag = true;
194-
static uint32_t relativeNumBlocks = 0;
194+
static uint32_t relativeInSize[NUM_BLOCKS] = {0};
195+
static bool resetDictFlag[8] = {true, true, true, true, true, true, true, true};
196+
static uint32_t relativeNumBlocks[NUM_BLOCKS] = {0};
197+
#pragma HLS array_partition variable = relativeInSize
198+
#pragma HLS array_partition variable = resetDictFlag
199+
#pragma HLS array_partition variable = relativeNumBlocks
195200

196201
uintDictV_t dict[LZ_DICT_SIZE];
197202
#pragma HLS RESOURCE variable = dict core = XPM_MEMORY uram
@@ -208,11 +213,12 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
208213
// output register
209214
IntVectorStream_dt<32, 1> outValue;
210215
// loop over blocks
216+
211217
while (true) {
212218
uint32_t iIdx = 0;
213219
// once 8MB data is processed reset dictionary
214220
// 8MB based on index 3 bytes
215-
if (resetDictFlag) {
221+
if (resetDictFlag[ii]) {
216222
ap_uint<MATCH_LEVEL* c_dictEleWidth> resetValue = 0;
217223
for (int i = 0; i < MATCH_LEVEL; i++) {
218224
#pragma HLS UNROLL
@@ -225,11 +231,11 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
225231
#pragma HLS UNROLL FACTOR = 2
226232
dict[i] = resetValue;
227233
}
228-
resetDictFlag = false;
229-
relativeInSize = 0;
230-
relativeNumBlocks = 0;
234+
resetDictFlag[ii] = false;
235+
relativeInSize[ii] = 0;
236+
relativeNumBlocks[ii] = 0;
231237
} else {
232-
relativeNumBlocks++;
238+
relativeNumBlocks[ii]++;
233239
}
234240
// check if end of data
235241
auto nextVal = inStream.read();
@@ -245,7 +251,7 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
245251
inVal = nextVal;
246252
nextVal = inStream.read();
247253
present_window[++iIdx] = inVal.data[0];
248-
++relativeInSize;
254+
relativeInSize[ii]++;
249255
}
250256
// assuming that, at least bytes more than LEFT_BYTES will be present at the input
251257
lz_fill_circular_buf:
@@ -254,7 +260,7 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
254260
inVal = nextVal;
255261
nextVal = inStream.read();
256262
lclBufStream << inVal.data[0];
257-
++relativeInSize;
263+
relativeInSize[ii]++;
258264
}
259265
// lz_compress main
260266
outValue.strobe = 1;
@@ -265,12 +271,12 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
265271
#ifndef DISABLE_DEPENDENCE
266272
#pragma HLS dependence variable = dict inter false
267273
#endif
268-
uint32_t currIdx = (iIdx + (relativeNumBlocks * MAX_INPUT_SIZE)) - MATCH_LEN + 1;
274+
uint32_t currIdx = (iIdx + (relativeNumBlocks[ii] * MAX_INPUT_SIZE)) - MATCH_LEN + 1;
269275
// read from input stream into circular buffer
270276
auto inValue = lclBufStream.read(); // pop latest value from FIFO
271277
lclBufStream << nextVal.data[0]; // push latest read value to FIFO
272278
nextVal = inStream.read(); // read next value from input stream
273-
++relativeInSize;
279+
relativeInSize[ii]++;
274280

275281
// shift present window and load next value
276282
for (uint8_t m = 0; m < MATCH_LEN - 1; m++) {
@@ -313,7 +319,7 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
313319
}
314320
if ((len >= MIN_MATCH) && (currIdx > compareIdx) && ((currIdx - compareIdx) < LZ_MAX_OFFSET_LIMIT) &&
315321
((currIdx - compareIdx - 1) >= MIN_OFFSET) &&
316-
(compareIdx >= (relativeNumBlocks * MAX_INPUT_SIZE))) {
322+
(compareIdx >= (relativeNumBlocks[ii] * MAX_INPUT_SIZE))) {
317323
len = len;
318324
} else {
319325
len = 0;
@@ -344,7 +350,7 @@ void lzCompress(hls::stream<IntVectorStream_dt<8, 1> >& inStream, hls::stream<In
344350
}
345351

346352
// once relativeInSize becomes 8MB set the flag to true
347-
resetDictFlag = (relativeInSize >= (totalDictSize)) ? true : false;
353+
resetDictFlag[ii] = (relativeInSize[ii] >= (totalDictSize)) ? true : false;
348354
// end of block
349355
outValue.strobe = 0;
350356
outStream << outValue;

data_compression/L1/include/hw/s2mm.hpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ template <int DATAWIDTH, int BURST_SIZE, class OUTSIZE_DT = uint32_t>
6767
void stream2MM(ap_uint<DATAWIDTH>* out,
6868
uint32_t* checksumData,
6969
hls::stream<ap_uint<32> >& checksumStream,
70-
hls::stream<bool>& checksumEos,
7170
hls::stream<ap_uint<DATAWIDTH> >& inStream,
7271
hls::stream<bool>& endOfStream,
7372
hls::stream<OUTSIZE_DT>& outSize,
@@ -100,9 +99,7 @@ void stream2MM(ap_uint<DATAWIDTH>* out,
10099
output_size[0] = outSize.read();
101100

102101
// write checksum value to DDR
103-
bool eosFlag = checksumEos.read();
104-
if (!eosFlag) checksumData[0] = checksumStream.read();
105-
eosFlag = checksumEos.read();
102+
checksumData[0] = checksumStream.read();
106103
}
107104

108105
template <int BURST_SIZE, int DATAWIDTH, int NUM_BLOCK>
@@ -549,6 +546,40 @@ void s2mmEosStreamSimple(ap_uint<DATAWIDTH>* out, hls::stream<ap_uint<DATAWIDTH
549546
}
550547
}
551548

549+
template <int DATAWIDTH, int BURST_SIZE>
550+
void s2mmWithSize(ap_uint<DATAWIDTH>* out,
551+
hls::stream<ap_uint<DATAWIDTH + 8> >& inStream,
552+
const uint32_t index,
553+
uint32_t* decSize,
554+
hls::stream<uint32_t>& decSizeStream) {
555+
/**
556+
* @brief This module reads DATAWIDTH data from stream based on
557+
* size stream and writes the data to DDR.
558+
*
559+
* @tparam DATAWIDTH width of data bus
560+
* @tparam BURST_SIZE burst size of the data transfers
561+
* @param out output memory address
562+
* @param inStream input stream
563+
* @param endOfStream stream to indicate end of data stream
564+
* @param outSize output data size
565+
*/
566+
567+
bool eos = false;
568+
ap_uint<DATAWIDTH + 8> dummy = 0;
569+
s2mmWithSize:
570+
for (int j = 0; eos == false; j += BURST_SIZE) {
571+
for (int i = 0; i < BURST_SIZE; i++) {
572+
#pragma HLS PIPELINE II = 1
573+
ap_uint<DATAWIDTH + 8> inValue = (eos == true) ? dummy : inStream.read();
574+
bool eos_tmp = (eos == true) ? true : inValue.range(DATAWIDTH + 7, DATAWIDTH);
575+
ap_uint<DATAWIDTH> outValue = inValue.range(DATAWIDTH - 1, 0);
576+
out[j + i] = outValue;
577+
eos = eos_tmp;
578+
}
579+
}
580+
decSize[index] = decSizeStream.read();
581+
}
582+
552583
template <int DATAWIDTH, int BURST_SIZE, class OUTSIZE_DT = uint32_t>
553584
void s2mmEosSimple(ap_uint<DATAWIDTH>* out,
554585
hls::stream<ap_uint<DATAWIDTH> >& inStream,

data_compression/L1/include/hw/stream_downsizer.hpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,46 @@ void bufferDownsizer(hls::stream<ap_uint<IN_DATAWIDTH + SIZE_DWIDTH> >& inStream
138138
outStream << outVal;
139139
}
140140

141+
template <int IN_DATAWIDTH, int OUT_DATAWIDTH, int SIZE_DWIDTH = 4>
142+
void bufferDownsizerVec(hls::stream<ap_uint<IN_DATAWIDTH + SIZE_DWIDTH> >& inStream,
143+
hls::stream<IntVectorStream_dt<8, OUT_DATAWIDTH / 8> >& outStream) {
144+
constexpr uint16_t c_factor = IN_DATAWIDTH / OUT_DATAWIDTH;
145+
constexpr uint8_t c_outWord = OUT_DATAWIDTH / 8;
146+
constexpr uint8_t c_outDataHigh = OUT_DATAWIDTH + SIZE_DWIDTH - 1;
147+
IntVectorStream_dt<8, c_outWord> outVal;
148+
149+
downsizer_top:
150+
while (1) {
151+
auto inVal = inStream.read();
152+
// proceed further if valid size
153+
ap_uint<SIZE_DWIDTH> inSize = inVal.range(SIZE_DWIDTH - 1, 0);
154+
if (inSize == 0) break;
155+
downsizer_assign:
156+
while (inSize > 0) {
157+
#pragma HLS PIPELINE II = 1
158+
ap_uint<OUT_DATAWIDTH> outReg = inVal.range(c_outDataHigh, SIZE_DWIDTH);
159+
inVal >>= OUT_DATAWIDTH;
160+
outVal.strobe = ((inSize < c_outWord) ? (uint8_t)inSize : c_outWord);
161+
for (uint8_t i = 0; i < c_outWord; ++i) {
162+
#pragma HLS UNROLL
163+
outVal.data[i] = outReg.range((i * 8) + 7, i * 8);
164+
}
165+
outStream << outVal;
166+
inSize -= outVal.strobe;
167+
if (inSize == 0) {
168+
inVal = inStream.read();
169+
inSize = inVal.range(SIZE_DWIDTH - 1, 0);
170+
}
171+
}
172+
// Block end Condition
173+
outVal.strobe = 0;
174+
outStream << outVal;
175+
}
176+
// File end Condition
177+
outVal.strobe = 0;
178+
outStream << outVal;
179+
}
180+
141181
template <int IN_DATAWIDTH, int OUT_DATAWIDTH>
142182
void simpleStreamDownSizer(hls::stream<ap_uint<IN_DATAWIDTH> >& inStream,
143183
hls::stream<uint16_t>& inSizeStream,

0 commit comments

Comments
 (0)