From f6efa833613234d297a96ad038a220ac7e5818be Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 18 Apr 2024 15:23:29 +0100 Subject: [PATCH 01/16] [Deconv] Initial draft of deconv node --- fetch-repos.sh | 3 +- src/finn/custom_op/fpgadataflow/__init__.py | 4 +- .../custom_op/fpgadataflow/deconvolution.py | 167 +++++++++ .../custom_op/fpgadataflow/hls/__init__.py | 2 + .../fpgadataflow/hls/deconvolution_hls.py | 348 ++++++++++++++++++ src/finn/util/data_packing.py | 51 +++ .../fpgadataflow/test_fpgadataflow_deconv.py | 65 ++++ 7 files changed, 638 insertions(+), 2 deletions(-) create mode 100644 src/finn/custom_op/fpgadataflow/deconvolution.py create mode 100644 src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py diff --git a/fetch-repos.sh b/fetch-repos.sh index 073c052d67..45e0effb94 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -1,5 +1,6 @@ #!/bin/bash # Copyright (c) 2020-2022, Xilinx, Inc. +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -32,7 +33,7 @@ FINN_EXP_COMMIT="de99347e936d51715f5356a1b6c64e37b91c23c2" BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" +HLSLIB_COMMIT="e80d94ca1f28b80476b0289d154d239976c25fef" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index aed2ab7fe1..a22831ca37 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -1,5 +1,5 @@ # Copyright (C) 2020-2022, Xilinx, Inc. -# Copyright (C) 2023-2024, Advanced Micro Devices, Inc. +# Copyright (C) 2022-2024, Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ from finn.custom_op.fpgadataflow.convolutioninputgenerator import ( ConvolutionInputGenerator, ) +from finn.custom_op.fpgadataflow.deconvolution import Deconvolution from finn.custom_op.fpgadataflow.downsampler import DownSampler from finn.custom_op.fpgadataflow.duplicatestreams import DuplicateStreams from finn.custom_op.fpgadataflow.fmpadding import FMPadding @@ -68,6 +69,7 @@ custom_op["AddStreams"] = AddStreams custom_op["ChannelwiseOp"] = ChannelwiseOp custom_op["ConvolutionInputGenerator"] = ConvolutionInputGenerator +custom_op["Deconvolution"] = Deconvolution custom_op["DownSampler"] = DownSampler custom_op["DuplicateStreams"] = DuplicateStreams custom_op["FMPadding"] = FMPadding diff --git a/src/finn/custom_op/fpgadataflow/deconvolution.py b/src/finn/custom_op/fpgadataflow/deconvolution.py new file mode 100644 index 0000000000..0e14bea641 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/deconvolution.py @@ -0,0 +1,167 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import warnings +from qonnx.core.datatype import DataType + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp + + +class Deconvolution(HWCustomOp): + """Abstraction layer for HW implementation of Deconvolution""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = { + "KernelDim": ("ints", True, []), # [H, W] = [Y, X] + "IFMChannels": ("i", True, 0), + "OFMChannels": ("i", True, 0), + "IFMDim": ("ints", True, []), # [H, W] = [Y, X] + "PE": ("i", True, 0), + "SIMD": ("i", True, 0), + "Stride": ("ints", True, [1, 1]), # [H, W] = [Y, X] + # FINN DataTypes for inputs, weights, outputs + "inputDataType": ("s", True, ""), + "weightDataType": ("s", True, ""), + "outputDataType": ("s", True, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_normal_input_shape(self, ind=0): + ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch) + return ishape + + def get_folded_input_shape(self, ind=0): + ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" + fold = int(ifm_ch / simd) + folded_ishape = (1, ifm_dim_h, ifm_dim_w, fold, simd) + return folded_ishape + + def get_normal_output_shape(self, ind=0): + idim_h, idim_w = self.get_nodeattr("IFMDim") + stride_h, stride_w = self.get_nodeattr("Stride") + k_h, k_w = self.get_nodeattr("KernelDim") + ofm_ch = self.get_nodeattr("OFMChannels") + pad_h = int(k_h / stride_h) - 1 + pad_w = int(k_w / stride_w) - 1 + odim_h = (idim_h - 1) * stride_h - 2 * pad_h + (k_h - 1) + 1 + odim_w = (idim_w - 1) * stride_w - 2 * pad_w + (k_w - 1) + 1 + oshape = (1, odim_h, odim_w, ofm_ch) + return oshape + + def get_folded_output_shape(self, ind=0): + normal_oshape = self.get_normal_output_shape() + odim_h = normal_oshape[1] + odim_w = normal_oshape[2] + ofm_ch = normal_oshape[3] + pe = self.get_nodeattr("PE") + fold = int(ofm_ch / pe) + folded_oshape = (1, odim_h, odim_w, fold, pe) + return folded_oshape + + def make_shape_compatible_op(self, model): + exp_ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape for Deconv." + # implement tensor with correct shape + return super().make_const_shape_op(oshape) + + def infer_node_datatype(self, model): + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + # set output datatype from property + odt = self.get_output_datatype() + model.set_tensor_datatype(node.output[0], odt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + """Returns FINN DataType of input.""" + return DataType[self.get_nodeattr("inputDataType")] + + def get_weight_datatype(self): + """Returns FINN DataType of weights.""" + return DataType[self.get_nodeattr("weightDataType")] + + def get_output_datatype(self, ind=0): + """Returns FINN DataType of output.""" + return DataType[self.get_nodeattr("outputDataType")] + + def get_instream_width(self, ind=0): + """Returns stream width, input and output stream width are equal for + the sliding window function""" + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + ifm_ch = self.get_nodeattr("IFMChannels") + assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" + in_width = simd * ibits + return in_width + + def get_outstream_width(self, ind=0): + o_bits = self.get_output_datatype().bitwidth() + out_width = o_bits * self.get_nodeattr("PE") + return out_width + + def get_number_output_values(self): + folded_oshape = self.get_folded_output_shape() + num_output_elems = np.prod(folded_oshape[:-1]) + return num_output_elems + + def get_exp_cycles(self): + return 0 + + def bram_estimation(self): + return 0 + + def lut_estimation(self): + return 0 + + def uram_estimation(self): + return 0 + + def execute_node(self, context, graph): + pass diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py index 405c47a08d..c91541cab7 100644 --- a/src/finn/custom_op/fpgadataflow/hls/__init__.py +++ b/src/finn/custom_op/fpgadataflow/hls/__init__.py @@ -33,6 +33,7 @@ from finn.custom_op.fpgadataflow.hls.convolutioninputgenerator_hls import ( ConvolutionInputGenerator_hls, ) +from finn.custom_op.fpgadataflow.hls.deconvolution_hls import Deconvolution_hls from finn.custom_op.fpgadataflow.hls.downsampler_hls import DownSampler_hls from finn.custom_op.fpgadataflow.hls.duplicatestreams_hls import DuplicateStreams_hls from finn.custom_op.fpgadataflow.hls.fmpadding_hls import FMPadding_hls @@ -61,6 +62,7 @@ custom_op["ChannelwiseOp_hls"] = ChannelwiseOp_hls custom_op["CheckSum_hls"] = CheckSum_hls custom_op["ConvolutionInputGenerator_hls"] = ConvolutionInputGenerator_hls +custom_op["Deconvolution_hls"] = Deconvolution_hls custom_op["DownSampler_hls"] = DownSampler_hls custom_op["DuplicateStreams_hls"] = DuplicateStreams_hls custom_op["FMPadding_hls"] = FMPadding_hls diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py new file mode 100644 index 0000000000..0448f04b07 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -0,0 +1,348 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os +from qonnx.core.datatype import DataType +from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions + +from finn.custom_op.fpgadataflow.deconvolution import Deconvolution +from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +from finn.util.data_packing import ( + npy_to_rtlsim_input, + numpy_to_hls_code1, + rtlsim_output_to_npy, +) + + +class Deconvolution_hls(Deconvolution, HLSBackend): + """Corresponds to finn-hlslib deconv function.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(Deconvolution.get_nodeattr_types(self)) + my_attrs.update(HLSBackend.get_nodeattr_types(self)) + return my_attrs + + def calc_wmem(self): + """Calculates and returns WMEM.""" + ifm_ch = self.get_nodeattr("IFMChannels") + ofm_ch = self.get_nodeattr("OFMChannels") + kernel_2 = np.prod(self.get_nodeattr("KernelDim")) + pe = self.get_nodeattr("PE") + simd = self.get_nodeattr("SIMD") + assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." + assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." + wmem = (ofm_ch / pe) * kernel_2 * (ifm_ch / simd) + return int(wmem) + + def generate_params(self, model, path): + code_gen_dir = path + # weights, if not external + weights = model.get_initializer(self.onnx_node.input[1]) + # save hlslib-compatible weights in params.h + weight_filename = "{}/params.h".format(code_gen_dir) + self.make_weight_file(weights, "hls_header", weight_filename) + + def make_weight_file(self, weights, weight_file_mode, weight_file_name): + """Produce a file containing given weights in appropriate format for this + layer. This file can be used for either synthesis or run-time reconfig + of weights. + + Arguments: + + * weights : numpy array with weights to be put into the file + * weight_file_mode : one of {hls_header, decoupled_verilog_dat, + decoupled_runtime} + * weight_file_name : filename for the weight file to be generated + + """ + # convert weights into hlslib-compatible format + weight_tensor = self.get_hw_compatible_weight_tensor(weights) + export_wdt = self.get_weight_datatype() + if weight_file_mode == "hls_header": + weight_hls_code = numpy_to_hls_code1(weight_tensor, export_wdt, "weights", True, True) + # write weights into C++ header file as dictated by finn-hlslib + f_weights = open(weight_file_name, "w") + f_weights.write( + "static hls::vector, {}> const weights[{}] = ".format( + export_wdt.get_hls_datatype_str(), + self.get_nodeattr("SIMD"), + self.get_nodeattr("PE"), + self.calc_wmem(), + ) + ) + f_weights.write(weight_hls_code) + f_weights.close() + + def get_hw_compatible_weight_tensor(self, orig_weight_matrix): + """Convert the original numpy weight matrix orig_weight_matrix into + a form suitable for passing to the hlslib call: + * ensure OCH % PE == 0 and ICH % SIMD == 0 + * interleave rows between PEs + * reshape into (1, PE, WMEM, SIMD) and return + """ + k_h, k_w = self.get_nodeattr("KernelDim") + ifm_ch = self.get_nodeattr("IFMChannels") + ofm_ch = self.get_nodeattr("OFMChannels") + pe = self.get_nodeattr("PE") + simd = self.get_nodeattr("SIMD") + wmem = self.calc_wmem() + assert orig_weight_matrix.shape == ( + k_h * k_w * ifm_ch, + ofm_ch, + ), """Weights matrix doesn't + have expected shape (k_h*k_w*ifm_ch, ofm_ch)""" + assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." + assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." + # start by transposing the original weight matrix, since ONNX and + # finn-hlslib use different assumptions + # ONNX uses (in_features, out_features) and matmul(x, W) + # finn-hlslib uses (out_features, in_features) and matmul(W, x) + ret = orig_weight_matrix.T + # interleave rows between PEs and reshape + # distribute rows between PEs + ret = interleave_matrix_outer_dim_from_partitions(ret, pe) + # create SIMD as innermost dimension and add a dummy outer dim + ret = ret.reshape(1, pe, wmem, simd) + # reverse the SIMD dimension + ret = np.flip(ret, axis=-1) + return ret + + def global_includes(self): + # self.code_gen_dict["$GLOBALS$"] = ['#include "weights.hpp"'] + # self.code_gen_dict["$GLOBALS$"] += ['#include "activations.hpp"'] + + self.code_gen_dict["$GLOBALS$"] = ['#include "deconv.hpp"'] + + def defines(self, var): + ifm_dim = self.get_nodeattr("IFMDim") + self.code_gen_dict["$DEFINES$"] = [ + """#define Kernel {}\n #define Stride {}\n + #define IFMH {}\n #define IFMW {}\n #define ICH {}\n + #define OCH {}\n #define SIMD1 {}\n #define PE1 {}""".format( + self.get_nodeattr("KernelDim")[0], + self.get_nodeattr("Stride")[0], + ifm_dim[0], + ifm_dim[1], + self.get_nodeattr("IFMChannels"), + self.get_nodeattr("OFMChannels"), + self.get_nodeattr("SIMD"), + self.get_nodeattr("PE"), + ) + ] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + elem_hls_type = dtype.get_hls_datatype_str() + simd = self.get_nodeattr("SIMD") + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + # note: the innermost dim is reversed for the input + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintvectorstream<%s, %s, %d>("%s", in0_%s, false);' + % ( + elem_hls_type, + npy_type, + simd, + npy_in, + self.hls_sname(), + ) + ) + + def strm_decl(self): + idtype = self.get_input_datatype() + odtype = self.get_output_datatype() + simd = self.get_nodeattr("SIMD") + pe = self.get_nodeattr("PE") + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> in0_{} ("in0_{}");'.format( + idtype.get_hls_datatype_str(), simd, self.hls_sname(), self.hls_sname() + ) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> out_{} ("out_{}");'.format( + odtype.get_hls_datatype_str(), pe, self.hls_sname(), self.hls_sname() + ) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [ + """deconv + (weights, in0_{}, out_{});""".format( + self.hls_sname(), + self.hls_sname(), + ) + ] + + def dataoutstrm(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + pe = self.get_nodeattr("PE") + dtype = self.get_output_datatype() + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + shape = self.get_folded_output_shape() + shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") + + # note: the innermost dim is not reversed for the output + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintvectorstream2npy<%s, %s, %d>(out_%s, %s, "%s", false);' + % ( + elem_hls_type, + npy_type, + pe, + self.hls_sname(), + shape_cpp_str, + npy_out, + ) + ] + + def save_as_npy(self): + self.code_gen_dict["$SAVEASCNPY$"] = [] + + def blackboxfunction(self): + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + """void {}(hls::stream> &in0_{}, + hls::stream> &out_{} + )""".format( + self.onnx_node.name, + self.get_instream_width(), + self.hls_sname(), + self.get_outstream_width(), + self.hls_sname(), + ) + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = [ + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() + ] + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() + ) + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") + + self.code_gen_dict["$PRAGMAS$"].append('#include "params.h"') + # the weight tensor is ap_uint [PE][WMEM] + # partition for parallel access along the PE dimension (dim 1) + # self.code_gen_dict["$PRAGMAS$"].append( + # ("#pragma HLS ARRAY_PARTITION variable=weights.m_weights " "complete dim=1") + # ) + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + + # TODO ensure codegen dir exists + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + # create a npy file fore each input of the node (in_ind is input index) + in_ind = 0 + for inputs in node.input: + # it is assumed that the first input of the node is the data input + # the second input are the weights + # the third input are the thresholds + if in_ind == 0: + assert ( + str(context[inputs].dtype) == "float32" + ), """Input datatype is + not float32 as expected.""" + expected_inp_shape = self.get_folded_input_shape() + reshaped_input = context[inputs].reshape(expected_inp_shape) + if self.get_input_datatype() == DataType["BIPOLAR"]: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + export_idt = DataType["BINARY"] + else: + export_idt = self.get_input_datatype() + # make copy before saving the array + reshaped_input = reshaped_input.copy() + np.save( + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + reshaped_input, + ) + elif in_ind > 2: + raise Exception("Unexpected input found for MatrixVectorActivation") + in_ind += 1 + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_output(context) + # reinterpret binary output as bipolar where needed + if self.get_output_datatype() == DataType["BIPOLAR"]: + out = context[node.output[0]] + out = 2 * out - 1 + context[node.output[0]] = out + assert ( + context[node.output[0]].shape == self.get_normal_output_shape() + ), "cppsim did not produce expected output shape" + elif mode == "rtlsim": + sim = self.get_rtlsim() + nbits = self.get_instream_width() + inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) + self.reset_rtlsim(sim) + self.toggle_clk(sim) + output = self.rtlsim(sim, inp) + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy(output, out_npy_path, odt, out_shape, packed_bits, target_bits) + + # load and reshape output + output = np.load(out_npy_path) + oshape = self.get_normal_output_shape() + output = np.asarray([output], dtype=np.float32).reshape(*oshape) + context[node.output[0]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index 7698850029..f23f409f39 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -281,6 +281,57 @@ def elem2str(x): return ret +def numpy_to_hls_code1(ndarray, dtype, hls_var_name, pack_innermost_dim=True, no_decl=False): + """Return C++ code representation of a numpy ndarray with FINN DataType + dtype, using hls_var_name as the resulting C++ variable name. If + pack_innermost_dim is specified, the innermost dimension of the ndarray + will be packed into a hex string using array2hexstring. If no_decl is + set to True, no variable name and type will be generated as part of the + emitted string. + """ + hls_dtype = dtype.get_hls_datatype_str() + if type(ndarray) != np.ndarray or ndarray.dtype != np.float32: + # try to convert to a float numpy array (container dtype is float) + ndarray = np.asarray(ndarray, dtype=np.float32) + if pack_innermost_dim: + idimlen = ndarray.shape[-1] + idimbits = idimlen * dtype.bitwidth() + idimbits = roundup_to_integer_multiple(idimbits, 4) + ndarray = pack_innermost_dim_as_hex_string(ndarray, dtype, idimbits) + hls_dtype = "ap_uint<%d>" % idimbits + ndims = ndarray.ndim + # add type string and variable name + # e.g. "const ap_uint<64>" "weightMem0" + ret = "%s %s" % (hls_dtype, hls_var_name) + # add dimensions + for d in range(ndims): + ret += "[%d]" % ndarray.shape[d] + orig_printops = np.get_printoptions() + np.set_printoptions(threshold=sys.maxsize) + + # define a function to convert a single element into a C++ init string + # a single element can be a hex string if we are using packing + def elem2str(x): + if type(x) == str or type(x) == np.str_: + return '{{"%s",},}' % (x) + elif type(x) == np.float32: + if dtype.is_integer(): + return str(int(x)) + else: + return str(x) + else: + raise Exception("Unsupported type for numpy_to_hls_code") + + strarr = np.array2string(ndarray, separator=", ", formatter={"all": elem2str}) + np.set_printoptions(**orig_printops) + strarr = strarr.replace("[", "{").replace("]", "}") + if no_decl: + ret = strarr + ";" + else: + ret = ret + " = \n" + strarr + ";" + return ret + + def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=True): """Convert the multidimensional NumPy array of integers (stored as floats) from input_file into a flattened sequence of Python arbitrary-precision diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index f1fc989066..981430db81 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -123,6 +123,71 @@ def set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding): return model +def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): + idim_h, idim_w = idim + stride_h, stride_w = stride + odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 + odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 + + inp = helper.make_tensor_value_info( + "inp", + TensorProto.FLOAT, + [ + 1, + idim_h, + idim_w, + ifm_ch, + ], + ) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, odim_h, odim_w, ofm_ch]) + + W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ifm_ch * k * k, ofm_ch]) + + Deconv = helper.make_node( + "Deconvolution_hls", + ["inp", "W"], + ["outp"], + domain="finn.custom_op.fpgadataflow.hls", + backend="fpgadataflow", + KernelDim=[k, k], + IFMChannels=ifm_ch, + OFMChannels=ofm_ch, + IFMDim=idim, + Stride=[stride_h, stride_w], + PE=1, + SIMD=1, + inputDataType=idt.name, + weightDataType=wdt.name, + outputDataType=odt.name, + ) + + node_list = [Deconv] + value_info = [W] + + graph = helper.make_graph( + nodes=node_list, + name="convtranspose_graph", + inputs=[inp], + outputs=[outp], + value_info=value_info, + ) + + model = qonnx_make_model(graph, producer_name="convtranspose-model") + model = ModelWrapper(model) + + # initialize model + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype(model.graph.output[0].name, odt) + model.set_tensor_datatype("W", wdt) + + w_tensor = gen_finn_dt_tensor(wdt, [ifm_ch * k * k, ofm_ch]) + model.set_initializer("W", w_tensor) + + model = model.transform(InferShapes()) + + return model + + # input image dimension @pytest.mark.parametrize("idim", [[8, 8], [10, 8]]) # number of rows and number of cols to add From db9130b041feddf349cc976211a04a27a0dd9f76 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 25 Apr 2024 15:29:30 +0100 Subject: [PATCH 02/16] [Deconv] Add padding and cleanup hls code generation --- fetch-repos.sh | 2 +- .../custom_op/fpgadataflow/deconvolution.py | 4 +- .../fpgadataflow/hls/deconvolution_hls.py | 54 ++++++++++--------- src/finn/custom_op/fpgadataflow/templates.py | 1 + src/finn/util/data_packing.py | 4 +- 5 files changed, 36 insertions(+), 29 deletions(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 45e0effb94..0c4a4a9349 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -33,7 +33,7 @@ FINN_EXP_COMMIT="de99347e936d51715f5356a1b6c64e37b91c23c2" BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="e80d94ca1f28b80476b0289d154d239976c25fef" +HLSLIB_COMMIT="d56b1d0c1eeb844a873fb29a29240a86e00d9f80" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" diff --git a/src/finn/custom_op/fpgadataflow/deconvolution.py b/src/finn/custom_op/fpgadataflow/deconvolution.py index 0e14bea641..8dd33fea83 100644 --- a/src/finn/custom_op/fpgadataflow/deconvolution.py +++ b/src/finn/custom_op/fpgadataflow/deconvolution.py @@ -48,6 +48,7 @@ def get_nodeattr_types(self): "PE": ("i", True, 0), "SIMD": ("i", True, 0), "Stride": ("ints", True, [1, 1]), # [H, W] = [Y, X] + "Padding": ("ints", True, []), # [H, W] = [Y, X] # FINN DataTypes for inputs, weights, outputs "inputDataType": ("s", True, ""), "weightDataType": ("s", True, ""), @@ -76,8 +77,7 @@ def get_normal_output_shape(self, ind=0): stride_h, stride_w = self.get_nodeattr("Stride") k_h, k_w = self.get_nodeattr("KernelDim") ofm_ch = self.get_nodeattr("OFMChannels") - pad_h = int(k_h / stride_h) - 1 - pad_w = int(k_w / stride_w) - 1 + pad_h, pad_w = self.get_nodeattr("Padding") odim_h = (idim_h - 1) * stride_h - 2 * pad_h + (k_h - 1) + 1 odim_w = (idim_w - 1) * stride_w - 2 * pad_w + (k_w - 1) + 1 oshape = (1, odim_h, odim_w, ofm_ch) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index 0448f04b07..8931d818d4 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -93,11 +93,11 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): # write weights into C++ header file as dictated by finn-hlslib f_weights = open(weight_file_name, "w") f_weights.write( - "static hls::vector, {}> const weights[{}] = ".format( + "static {} const weights[{}][{}][{}] = ".format( export_wdt.get_hls_datatype_str(), - self.get_nodeattr("SIMD"), - self.get_nodeattr("PE"), self.calc_wmem(), + self.get_nodeattr("PE"), + self.get_nodeattr("SIMD"), ) ) f_weights.write(weight_hls_code) @@ -117,19 +117,15 @@ def get_hw_compatible_weight_tensor(self, orig_weight_matrix): simd = self.get_nodeattr("SIMD") wmem = self.calc_wmem() assert orig_weight_matrix.shape == ( - k_h * k_w * ifm_ch, ofm_ch, + k_h * k_w * ifm_ch, ), """Weights matrix doesn't - have expected shape (k_h*k_w*ifm_ch, ofm_ch)""" + #have expected shape (k_h*k_w*ifm_ch, ofm_ch)""" assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." - # start by transposing the original weight matrix, since ONNX and - # finn-hlslib use different assumptions - # ONNX uses (in_features, out_features) and matmul(x, W) - # finn-hlslib uses (out_features, in_features) and matmul(W, x) - ret = orig_weight_matrix.T # interleave rows between PEs and reshape # distribute rows between PEs + ret = orig_weight_matrix ret = interleave_matrix_outer_dim_from_partitions(ret, pe) # create SIMD as innermost dimension and add a dummy outer dim ret = ret.reshape(1, pe, wmem, simd) @@ -138,19 +134,19 @@ def get_hw_compatible_weight_tensor(self, orig_weight_matrix): return ret def global_includes(self): - # self.code_gen_dict["$GLOBALS$"] = ['#include "weights.hpp"'] - # self.code_gen_dict["$GLOBALS$"] += ['#include "activations.hpp"'] - self.code_gen_dict["$GLOBALS$"] = ['#include "deconv.hpp"'] def defines(self, var): ifm_dim = self.get_nodeattr("IFMDim") self.code_gen_dict["$DEFINES$"] = [ - """#define Kernel {}\n #define Stride {}\n - #define IFMH {}\n #define IFMW {}\n #define ICH {}\n - #define OCH {}\n #define SIMD1 {}\n #define PE1 {}""".format( + """constexpr unsigned Kernel = {};\n constexpr unsigned Stride = {};\n + constexpr unsigned Padding = {};\n constexpr unsigned IFMH = {};\n + constexpr unsigned IFMW = {};\n constexpr unsigned ICH = {};\n + constexpr unsigned OCH = {};\n constexpr unsigned SIMD1 = {};\n + constexpr unsigned PE1 = {};""".format( self.get_nodeattr("KernelDim")[0], self.get_nodeattr("Stride")[0], + self.get_nodeattr("Padding")[0], ifm_dim[0], ifm_dim[1], self.get_nodeattr("IFMChannels"), @@ -170,7 +166,7 @@ def read_npy_data(self): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintvectorstream<%s, %s, %d>("%s", in0_%s, false);' + 'npy2vectorstream<%s, %s, %d>("%s", in0_%s, false);' % ( elem_hls_type, npy_type, @@ -198,13 +194,27 @@ def strm_decl(self): ) def docompute(self): + odtype = self.get_output_datatype() + pe = self.get_nodeattr("PE") + ishape = self.get_normal_input_shape() self.code_gen_dict["$DOCOMPUTE$"] = [ - """deconv + "hls::stream> strm;".format(odtype.get_hls_datatype_str(), pe) + ] + self.code_gen_dict["$DOCOMPUTE$"].append("unsigned timeout = 0;") + self.code_gen_dict["$DOCOMPUTE$"].append("while(timeout < %s) {" % np.prod(ishape)) + self.code_gen_dict["$DOCOMPUTE$"].append( + """deconv (weights, in0_{}, out_{});""".format( self.hls_sname(), self.hls_sname(), ) - ] + ) + self.code_gen_dict["$DOCOMPUTE$"].append("if(out_V.empty()) timeout++;") + self.code_gen_dict["$DOCOMPUTE$"].append("else {") + self.code_gen_dict["$DOCOMPUTE$"].append("strm << out_V.read();") + self.code_gen_dict["$DOCOMPUTE$"].append("timeout = 0;") + self.code_gen_dict["$DOCOMPUTE$"].append("}") + self.code_gen_dict["$DOCOMPUTE$"].append("}") def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -218,20 +228,16 @@ def dataoutstrm(self): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintvectorstream2npy<%s, %s, %d>(out_%s, %s, "%s", false);' + 'vectorstream2npy<%s, %s, %d>(strm, %s, "%s", false);' % ( elem_hls_type, npy_type, pe, - self.hls_sname(), shape_cpp_str, npy_out, ) ] - def save_as_npy(self): - self.code_gen_dict["$SAVEASCNPY$"] = [] - def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ """void {}(hls::stream> &in0_{}, diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..72c607731a 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -32,6 +32,7 @@ #define AP_INT_MAX_W $AP_INT_MAX_W$ #include "cnpy.h" #include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" #include #include "bnn-library.h" diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index f23f409f39..6133316fc1 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -322,9 +322,9 @@ def elem2str(x): else: raise Exception("Unsupported type for numpy_to_hls_code") - strarr = np.array2string(ndarray, separator=", ", formatter={"all": elem2str}) + strarr = np.array2string(ndarray.flatten(), separator=", ", formatter={"all": elem2str}) np.set_printoptions(**orig_printops) - strarr = strarr.replace("[", "{").replace("]", "}") + strarr = strarr.replace("[", "{").replace("]", ",}") if no_decl: ret = strarr + ";" else: From 629a65e79ebcd470ef51bf3696a297854a949661 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Tue, 15 Oct 2024 10:01:55 +0200 Subject: [PATCH 03/16] update finn-hlslib commit hash --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 5c54e8ecf7..66e1add27f 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -33,7 +33,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="d56b1d0c1eeb844a873fb29a29240a86e00d9f80" +HLSLIB_COMMIT="be554a3c4b47e6c3082f6158c057098d926f0d58" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" From b8501c9e460ae4796723ec5c19c17f79796a3342 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Tue, 15 Oct 2024 10:03:48 +0200 Subject: [PATCH 04/16] deconv test now passing multiple channels --- .../fpgadataflow/hls/deconvolution_hls.py | 14 ++-- .../fpgadataflow/test_fpgadataflow_deconv.py | 77 ++++++++++--------- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index 8931d818d4..7442dca99d 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -116,17 +116,19 @@ def get_hw_compatible_weight_tensor(self, orig_weight_matrix): pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") wmem = self.calc_wmem() - assert orig_weight_matrix.shape == ( - ofm_ch, - k_h * k_w * ifm_ch, - ), """Weights matrix doesn't - #have expected shape (k_h*k_w*ifm_ch, ofm_ch)""" + # assert orig_weight_matrix.shape == ( + # k_h * k_w * ifm_ch, + # ofm_ch, + # ), """Weights matrix doesn't + # have expected shape (k_h*k_w*ifm_ch, ofm_ch)""" assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." # interleave rows between PEs and reshape # distribute rows between PEs ret = orig_weight_matrix - ret = interleave_matrix_outer_dim_from_partitions(ret, pe) + ret = ret.flatten() + # breakpoint() + # ret = interleave_matrix_outer_dim_from_partitions(ret, pe) # create SIMD as innermost dimension and add a dummy outer dim ret = ret.reshape(1, pe, wmem, simd) # reverse the SIMD dimension diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index a09e788570..b642abc9a8 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -64,12 +64,11 @@ target_clk_ns = 10 -def set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding): +def set_up_reference_model(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): idim_h, idim_w = idim stride_h, stride_w = stride odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 - odt = DataType["INT32"] inp = helper.make_tensor_value_info( "inp", @@ -120,10 +119,10 @@ def set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding): model = model.transform(InferShapes()) - return model + return model, w_tensor -def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): +def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, w_tensor): idim_h, idim_w = idim stride_h, stride_w = stride odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 @@ -141,7 +140,8 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): ) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, odim_h, odim_w, ofm_ch]) - W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ifm_ch * k * k, ofm_ch]) + # W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ifm_ch * k * k, ofm_ch]) + W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, k, k, ifm_ch]) Deconv = helper.make_node( "Deconvolution_hls", @@ -154,6 +154,7 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): OFMChannels=ofm_ch, IFMDim=idim, Stride=[stride_h, stride_w], + Padding = [padding, padding], PE=1, SIMD=1, inputDataType=idt.name, @@ -180,7 +181,9 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): model.set_tensor_datatype(model.graph.output[0].name, odt) model.set_tensor_datatype("W", wdt) - w_tensor = gen_finn_dt_tensor(wdt, [ifm_ch * k * k, ofm_ch]) + # w_tensor = gen_finn_dt_tensor(wdt, [ifm_ch * k * k, ofm_ch]) + # w_tensor = w_tensor.reshape(ifm_ch * k * k, ofm_ch) + w_tensor = w_tensor.transpose(1, 2, 3, 0) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) @@ -189,33 +192,35 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding): # input image dimension -@pytest.mark.parametrize("idim", [[8, 8], [10, 8]]) +@pytest.mark.parametrize("idim", [[8, 8]]) # number of rows and number of cols to add -@pytest.mark.parametrize("stride", [[2, 2], [2, 3]]) +@pytest.mark.parametrize("stride", [[2, 2]]) # number of channels -@pytest.mark.parametrize("ifm_ch", [2]) +@pytest.mark.parametrize("ifm_ch", [4]) # number of channels -@pytest.mark.parametrize("ofm_ch", [4]) +@pytest.mark.parametrize("ofm_ch", [6]) # Input parallelism -@pytest.mark.parametrize("simd", [1, 2]) +@pytest.mark.parametrize("simd", [1]) # PE -@pytest.mark.parametrize("pe", [1, 2]) +@pytest.mark.parametrize("pe", [1]) # kernel size @pytest.mark.parametrize("k", [2]) # padding -@pytest.mark.parametrize("padding", [0, 1]) +@pytest.mark.parametrize("padding", [0]) # exec mode -@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.parametrize("exec_mode", ["cppsim"]) @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode): - idt = wdt = DataType["INT4"] + idt = wdt = DataType["INT8"] wdt = idt + odt = DataType["INT32"] idim_h, idim_w = idim stride_h, stride_w = stride - ref_model = set_up_reference_model(idt, wdt, k, idim, ifm_ch, ofm_ch, stride, padding) + ref_model, w_tensor = set_up_reference_model(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding) + model = create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, w_tensor) odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 @@ -225,34 +230,31 @@ def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, y_expected = oxe.execute_onnx(ref_model, input_dict)["outp"] - model = ref_model.transform(InferPixelPaddingDeconv()) - model = model.transform(InferConvInpGen()) - model = model.transform(InferQuantizedMatrixVectorActivation()) - model = model.transform(InferShapes()) - model = model.transform(GiveUniqueNodeNames()) - - y_produced = oxe.execute_onnx(model, input_dict)["outp"] - assert (y_produced == y_expected).all() + # model = model.transform(InferShapes()) + # model = model.transform(GiveUniqueNodeNames()) + input_tensor_nhwc = input_tensor.transpose(0, 2, 3, 1) + input_dict_nhwc = {"inp": input_tensor_nhwc} + # y_produced = oxe.execute_onnx(model, input_dict_nhwc)["outp"] + # assert (y_produced == y_expected).all() - model = model.transform(SpecializeLayers(test_fpga_part)) - model = model.transform(MinimizeAccumulatorWidth()) + # model = model.transform(SpecializeLayers(test_fpga_part)) + # model = model.transform(MinimizeAccumulatorWidth()) for n in model.graph.node: - if n.op_type.startswith("ConvolutionInputGenerator"): - convinputgen_node = getCustomOp(n) - convinputgen_node.set_nodeattr("SIMD", simd) - elif n.op_type.startswith("MVAU"): - mvau_node = getCustomOp(n) - mvau_node.set_nodeattr("PE", pe) - mvau_node.set_nodeattr("SIMD", simd) - - expected_oshape = (1, ofm_ch, odim_h, odim_w) + if n.op_type.startswith("Deconvolution_hls"): + deconv_node = getCustomOp(n) + deconv_node.set_nodeattr("PE", pe) + deconv_node.set_nodeattr("SIMD", simd) + expected_oshape = (1, odim_h, odim_w, ofm_ch) + # model.save("deconv.onnx") # cppsim if exec_mode == "cppsim": + model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) + # breakpoint() # rtlsim else: @@ -262,12 +264,13 @@ def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, model = model.transform(PrepareRTLSim()) model = model.transform(SetExecMode("rtlsim")) - y_produced = oxe.execute_onnx(model, input_dict)["outp"] + y_produced = oxe.execute_onnx(model, input_dict_nhwc)["outp"] assert y_produced.shape == expected_oshape + y_produced = y_produced.transpose(0, 3, 1, 2) assert (y_produced == y_expected).all() if exec_mode == "rtlsim": - node = model.get_nodes_by_op_type("FMPadding_Pixel_hls")[0] + node = model.get_nodes_by_op_type("Deconvolution_hls")[0] inst = getCustomOp(node) cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") exp_cycles_dict = model.analysis(exp_cycles_per_layer) From c5aa42766eb7b8391c51994c7b1fd43873847139 Mon Sep 17 00:00:00 2001 From: hlebleve Date: Wed, 6 Nov 2024 10:51:19 +0000 Subject: [PATCH 05/16] [Deconv] Updating tests and custom HLS node, passing most cppsim tests --- .../fpgadataflow/hls/deconvolution_hls.py | 24 +++++++++---------- .../fpgadataflow/test_fpgadataflow_deconv.py | 23 ++++-------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index 7442dca99d..6f0f8cbcff 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -89,7 +89,7 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): weight_tensor = self.get_hw_compatible_weight_tensor(weights) export_wdt = self.get_weight_datatype() if weight_file_mode == "hls_header": - weight_hls_code = numpy_to_hls_code1(weight_tensor, export_wdt, "weights", True, True) + weight_hls_code = numpy_to_hls_code1(weight_tensor, export_wdt, "weights", False, True) # write weights into C++ header file as dictated by finn-hlslib f_weights = open(weight_file_name, "w") f_weights.write( @@ -116,23 +116,20 @@ def get_hw_compatible_weight_tensor(self, orig_weight_matrix): pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") wmem = self.calc_wmem() - # assert orig_weight_matrix.shape == ( - # k_h * k_w * ifm_ch, - # ofm_ch, - # ), """Weights matrix doesn't - # have expected shape (k_h*k_w*ifm_ch, ofm_ch)""" + assert orig_weight_matrix.shape == ( + ofm_ch, k_h, k_w, ifm_ch + ), """Weights matrix doesn't + have expected shape (ofm_ch, k_h, k_w, ifm_ch)""" assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." assert ifm_ch % simd == 0, "Requirement input channels divisable by SIMD is violated." # interleave rows between PEs and reshape # distribute rows between PEs ret = orig_weight_matrix - ret = ret.flatten() - # breakpoint() - # ret = interleave_matrix_outer_dim_from_partitions(ret, pe) - # create SIMD as innermost dimension and add a dummy outer dim + ret = ret.reshape(ofm_ch, k_h * k_w * ifm_ch) + ret = interleave_matrix_outer_dim_from_partitions(ret, pe) + # create SIMD as innermost dimension ret = ret.reshape(1, pe, wmem, simd) - # reverse the SIMD dimension - ret = np.flip(ret, axis=-1) + ret = ret.transpose(0, 2, 1, 3) return ret def global_includes(self): @@ -199,11 +196,12 @@ def docompute(self): odtype = self.get_output_datatype() pe = self.get_nodeattr("PE") ishape = self.get_normal_input_shape() + oshape = self.get_normal_output_shape() self.code_gen_dict["$DOCOMPUTE$"] = [ "hls::stream> strm;".format(odtype.get_hls_datatype_str(), pe) ] self.code_gen_dict["$DOCOMPUTE$"].append("unsigned timeout = 0;") - self.code_gen_dict["$DOCOMPUTE$"].append("while(timeout < %s) {" % np.prod(ishape)) + self.code_gen_dict["$DOCOMPUTE$"].append("while(timeout < %s) {" % (2 * np.prod(oshape))) self.code_gen_dict["$DOCOMPUTE$"].append( """deconv (weights, in0_{}, out_{});""".format( diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index b642abc9a8..74224abeb5 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -41,22 +41,11 @@ import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim -from finn.transformation.fpgadataflow.convert_to_hw_layers import ( - InferConvInpGen, - InferQuantizedMatrixVectorActivation, -) from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP -from finn.transformation.fpgadataflow.infer_pixel_padding_deconv import ( - InferPixelPaddingDeconv, -) -from finn.transformation.fpgadataflow.minimize_accumulator_width import ( - MinimizeAccumulatorWidth, -) from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import pynq_part_map test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -139,8 +128,6 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, ], ) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, odim_h, odim_w, ofm_ch]) - - # W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ifm_ch * k * k, ofm_ch]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, k, k, ifm_ch]) Deconv = helper.make_node( @@ -181,8 +168,6 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, model.set_tensor_datatype(model.graph.output[0].name, odt) model.set_tensor_datatype("W", wdt) - # w_tensor = gen_finn_dt_tensor(wdt, [ifm_ch * k * k, ofm_ch]) - # w_tensor = w_tensor.reshape(ifm_ch * k * k, ofm_ch) w_tensor = w_tensor.transpose(1, 2, 3, 0) model.set_initializer("W", w_tensor) @@ -200,13 +185,13 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, # number of channels @pytest.mark.parametrize("ofm_ch", [6]) # Input parallelism -@pytest.mark.parametrize("simd", [1]) +@pytest.mark.parametrize("simd", [1,2,4]) # PE -@pytest.mark.parametrize("pe", [1]) +@pytest.mark.parametrize("pe", [1,3,6]) # kernel size -@pytest.mark.parametrize("k", [2]) +@pytest.mark.parametrize("k", [2,4]) # padding -@pytest.mark.parametrize("padding", [0]) +@pytest.mark.parametrize("padding", [0,1,2]) # exec mode @pytest.mark.parametrize("exec_mode", ["cppsim"]) @pytest.mark.fpgadataflow From f7cbd48b6c5e130d40c089ee55c64df75aadad15 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Wed, 6 Nov 2024 12:07:15 +0100 Subject: [PATCH 06/16] [Deconv] Updating tests and custom HLS node, passing most cppsim tests --- .../fpgadataflow/hls/deconvolution_hls.py | 5 ++++- tests/fpgadataflow/test_fpgadataflow_deconv.py | 14 ++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index 6f0f8cbcff..a3dca69ca9 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -117,7 +117,10 @@ def get_hw_compatible_weight_tensor(self, orig_weight_matrix): simd = self.get_nodeattr("SIMD") wmem = self.calc_wmem() assert orig_weight_matrix.shape == ( - ofm_ch, k_h, k_w, ifm_ch + ofm_ch, + k_h, + k_w, + ifm_ch, ), """Weights matrix doesn't have expected shape (ofm_ch, k_h, k_w, ifm_ch)""" assert ofm_ch % pe == 0, "Requirement output channels divisable by PE is violated." diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index 74224abeb5..7a5da68240 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -141,7 +141,7 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, OFMChannels=ofm_ch, IFMDim=idim, Stride=[stride_h, stride_w], - Padding = [padding, padding], + Padding=[padding, padding], PE=1, SIMD=1, inputDataType=idt.name, @@ -185,13 +185,13 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, # number of channels @pytest.mark.parametrize("ofm_ch", [6]) # Input parallelism -@pytest.mark.parametrize("simd", [1,2,4]) +@pytest.mark.parametrize("simd", [1, 2, 4]) # PE -@pytest.mark.parametrize("pe", [1,3,6]) +@pytest.mark.parametrize("pe", [1, 3, 6]) # kernel size -@pytest.mark.parametrize("k", [2,4]) +@pytest.mark.parametrize("k", [2, 4]) # padding -@pytest.mark.parametrize("padding", [0,1,2]) +@pytest.mark.parametrize("padding", [0, 1, 2]) # exec mode @pytest.mark.parametrize("exec_mode", ["cppsim"]) @pytest.mark.fpgadataflow @@ -204,7 +204,9 @@ def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, idim_h, idim_w = idim stride_h, stride_w = stride - ref_model, w_tensor = set_up_reference_model(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding) + ref_model, w_tensor = set_up_reference_model( + idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding + ) model = create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, w_tensor) odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 From 891a305ff371ed3d976bcd285f061f8d5e17ade9 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Wed, 6 Nov 2024 12:08:15 +0100 Subject: [PATCH 07/16] [Deconv] Updating tests and custom HLS node, passing most cppsim tests --- src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index a3dca69ca9..48c0e63453 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -198,7 +198,7 @@ def strm_decl(self): def docompute(self): odtype = self.get_output_datatype() pe = self.get_nodeattr("PE") - ishape = self.get_normal_input_shape() + # ishape = self.get_normal_input_shape() oshape = self.get_normal_output_shape() self.code_gen_dict["$DOCOMPUTE$"] = [ "hls::stream> strm;".format(odtype.get_hls_datatype_str(), pe) From c157dc0c694a80ef968843fc2381e014f59eaa53 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Thu, 16 Jan 2025 14:03:53 +0100 Subject: [PATCH 08/16] Changing the computation of the timout value to be based on parameters --- .../fpgadataflow/hls/deconvolution_hls.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index 48c0e63453..f43fb0dbbd 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -198,13 +198,27 @@ def strm_decl(self): def docompute(self): odtype = self.get_output_datatype() pe = self.get_nodeattr("PE") - # ishape = self.get_normal_input_shape() - oshape = self.get_normal_output_shape() + simd = self.get_nodeattr("SIMD") + i_ch = self.get_nodeattr("IFMChannels") + k_h, k_w = self.get_nodeattr("KernelDim") + s_h, s_w = self.get_nodeattr("Stride") + i_h, i_w = self.get_nodeattr("IFMDim") + p_h, p_w = self.get_nodeattr("Padding") + if p_w >= k_w - s_w: + padup = 0 + else: + padup = (k_w - p_w - 1) / s_w + crop = s_w * padup - ((k_w - s_w) - p_w) + sf = i_ch / simd + w_eff = padup + i_w + padup + wo_eff = (w_eff - 1) * s_w + k_w self.code_gen_dict["$DOCOMPUTE$"] = [ "hls::stream> strm;".format(odtype.get_hls_datatype_str(), pe) ] self.code_gen_dict["$DOCOMPUTE$"].append("unsigned timeout = 0;") - self.code_gen_dict["$DOCOMPUTE$"].append("while(timeout < %s) {" % (2 * np.prod(oshape))) + self.code_gen_dict["$DOCOMPUTE$"].append( + "while(timeout < %s) {" % (wo_eff * (crop + 1) * ((k_w / s_w) ** 2) * sf + 50) + ) self.code_gen_dict["$DOCOMPUTE$"].append( """deconv (weights, in0_{}, out_{});""".format( From cee4e70a05941d573937f829faa60d148c12b219 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Thu, 16 Jan 2025 14:31:57 +0100 Subject: [PATCH 09/16] Increasing the timeout value as it fails some test configurations --- src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index f43fb0dbbd..f1e7d7aca8 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -217,7 +217,7 @@ def docompute(self): ] self.code_gen_dict["$DOCOMPUTE$"].append("unsigned timeout = 0;") self.code_gen_dict["$DOCOMPUTE$"].append( - "while(timeout < %s) {" % (wo_eff * (crop + 1) * ((k_w / s_w) ** 2) * sf + 50) + "while(timeout < %s) {" % (wo_eff * (crop + 1) * ((k_w / s_w) ** 2) * 2 * sf + 50) ) self.code_gen_dict["$DOCOMPUTE$"].append( """deconv From 248848ef95c9c69a1aa133283fbcde5a71e738a6 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Thu, 16 Jan 2025 15:14:40 +0100 Subject: [PATCH 10/16] Updating HLSLIB commit hash to the most recent --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 9ff546bdbb..57e14bc291 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -33,7 +33,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="be554a3c4b47e6c3082f6158c057098d926f0d58" +HLSLIB_COMMIT="16cfc4b3ab895babf30f7db7c4bcac27d68317a9" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" From 91e811615017cd7b0992d82be815045003530cf8 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Thu, 16 Jan 2025 15:15:17 +0100 Subject: [PATCH 11/16] Setting test parameters to failing case --- tests/fpgadataflow/test_fpgadataflow_deconv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index 7a5da68240..26509bc738 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -181,17 +181,17 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, # number of rows and number of cols to add @pytest.mark.parametrize("stride", [[2, 2]]) # number of channels -@pytest.mark.parametrize("ifm_ch", [4]) +@pytest.mark.parametrize("ifm_ch", [2]) # number of channels -@pytest.mark.parametrize("ofm_ch", [6]) +@pytest.mark.parametrize("ofm_ch", [3]) # Input parallelism -@pytest.mark.parametrize("simd", [1, 2, 4]) +@pytest.mark.parametrize("simd", [1]) # PE -@pytest.mark.parametrize("pe", [1, 3, 6]) +@pytest.mark.parametrize("pe", [1]) # kernel size -@pytest.mark.parametrize("k", [2, 4]) +@pytest.mark.parametrize("k", [4]) # padding -@pytest.mark.parametrize("padding", [0, 1, 2]) +@pytest.mark.parametrize("padding", [1]) # exec mode @pytest.mark.parametrize("exec_mode", ["cppsim"]) @pytest.mark.fpgadataflow From 05f9a7066b96f41bac083952c6b8700afc6164bd Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Wed, 7 May 2025 15:45:17 +0200 Subject: [PATCH 12/16] updating templates to match recent changes --- .../fpgadataflow/hls/deconvolution_hls.py | 117 ++++++++++++------ src/finn/custom_op/fpgadataflow/hlsbackend.py | 14 +++ src/finn/custom_op/fpgadataflow/templates.py | 45 +++++++ 3 files changed, 135 insertions(+), 41 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index f1e7d7aca8..f30f91fad4 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -31,6 +31,7 @@ from qonnx.core.datatype import DataType from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions +from finn.custom_op.fpgadataflow import templates from finn.custom_op.fpgadataflow.deconvolution import Deconvolution from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import ( @@ -195,30 +196,14 @@ def strm_decl(self): ) ) - def docompute(self): - odtype = self.get_output_datatype() - pe = self.get_nodeattr("PE") - simd = self.get_nodeattr("SIMD") - i_ch = self.get_nodeattr("IFMChannels") - k_h, k_w = self.get_nodeattr("KernelDim") - s_h, s_w = self.get_nodeattr("Stride") - i_h, i_w = self.get_nodeattr("IFMDim") - p_h, p_w = self.get_nodeattr("Padding") - if p_w >= k_w - s_w: - padup = 0 - else: - padup = (k_w - p_w - 1) / s_w - crop = s_w * padup - ((k_w - s_w) - p_w) - sf = i_ch / simd - w_eff = padup + i_w + padup - wo_eff = (w_eff - 1) * s_w + k_w - self.code_gen_dict["$DOCOMPUTE$"] = [ - "hls::stream> strm;".format(odtype.get_hls_datatype_str(), pe) - ] - self.code_gen_dict["$DOCOMPUTE$"].append("unsigned timeout = 0;") - self.code_gen_dict["$DOCOMPUTE$"].append( - "while(timeout < %s) {" % (wo_eff * (crop + 1) * ((k_w / s_w) ** 2) * 2 * sf + 50) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> debug_out_{} ("out_{}");'.format( + odtype.get_hls_datatype_str(), pe, self.hls_sname(), self.hls_sname() + ) ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [] self.code_gen_dict["$DOCOMPUTE$"].append( """deconv (weights, in0_{}, out_{});""".format( @@ -226,12 +211,6 @@ def docompute(self): self.hls_sname(), ) ) - self.code_gen_dict["$DOCOMPUTE$"].append("if(out_V.empty()) timeout++;") - self.code_gen_dict["$DOCOMPUTE$"].append("else {") - self.code_gen_dict["$DOCOMPUTE$"].append("strm << out_V.read();") - self.code_gen_dict["$DOCOMPUTE$"].append("timeout = 0;") - self.code_gen_dict["$DOCOMPUTE$"].append("}") - self.code_gen_dict["$DOCOMPUTE$"].append("}") def dataoutstrm(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -245,28 +224,34 @@ def dataoutstrm(self): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'vectorstream2npy<%s, %s, %d>(strm, %s, "%s", false);' + 'vectorstream2npy<%s, %s, %d>(debug_out_%s, %s, "%s", false);' % ( elem_hls_type, npy_type, pe, + self.hls_sname(), shape_cpp_str, npy_out, ) ] def blackboxfunction(self): - self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream> &in0_{}, - hls::stream> &out_{} - )""".format( - self.onnx_node.name, - self.get_instream_width(), - self.hls_sname(), - self.get_outstream_width(), - self.hls_sname(), - ) - ] + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + output_elem_hls_type = self.get_output_datatype().get_hls_datatype_str() + simd = self.get_nodeattr("SIMD") + pe = self.get_nodeattr("PE") + in_stream = "hls::stream> &in0_%s" % ( + input_elem_hls_type, + simd, + self.hls_sname(), + ) + out_stream = "hls::stream> &out_%s" % ( + output_elem_hls_type, + pe, + self.hls_sname(), + ) + blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_stream, out_stream) + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ @@ -369,3 +354,53 @@ def execute_node(self, context, graph): mode ) ) + + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + self.docompute() + self.dataoutstrm() + self.save_as_npy() + self.timeout_value() + self.timeout_condition() + self.timeout_read_stream() + + template = templates.docompute_template_timeout + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + simd = self.get_nodeattr("SIMD") + i_ch = self.get_nodeattr("IFMChannels") + k_h, k_w = self.get_nodeattr("KernelDim") + s_h, s_w = self.get_nodeattr("Stride") + i_h, i_w = self.get_nodeattr("IFMDim") + p_h, p_w = self.get_nodeattr("Padding") + if p_w >= k_w - s_w: + padup = 0 + else: + padup = (k_w - p_w - 1) / s_w + crop = s_w * padup - ((k_w - s_w) - p_w) + sf = i_ch / simd + w_eff = padup + i_w + padup + wo_eff = (w_eff - 1) * s_w + k_w + self.code_gen_dict["$TIMEOUT_VALUE$"] = [ + "%s" % (wo_eff * (crop + 1) * ((k_w / s_w) ** 2) * 4 * sf + 50) + ] diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..c03a9029db 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -474,3 +474,17 @@ def get_ap_int_max_w(self): ret = max([instream, outstream]) assert ret <= 8191, "AP_INT_MAX_W=%d is larger than allowed maximum of 8191" % ret return ret + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"] + + def timeout_condition(self): + """Set timeout condition for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_CONDITION$"] = ["out_{}.empty()".format(self.hls_sname())] + + def timeout_read_stream(self): + """Set reading output stream procedure for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [ + "debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname()) + ] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 72c607731a..d2100a7516 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -59,6 +59,51 @@ """ +# template for single node execution with timeout (for single clock hls operations) +docompute_template_timeout = """ +#define AP_INT_MAX_W $AP_INT_MAX_W$ +#include "cnpy.h" +#include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" +#include +#include "bnn-library.h" + +// includes for network parameters +$GLOBALS$ + +// defines for network parameters +$DEFINES$ + +int main(){ +$PRAGMAS$ + +$STREAMDECLARATIONS$ + +$READNPYDATA$ + +unsigned timeout = 0; +while(timeout < $TIMEOUT_VALUE$){ + +$DOCOMPUTE$ + +if($TIMEOUT_CONDITION$){ +timeout++; +} + +else{ +$TIMEOUT_READ_STREAM$ +timeout = 0; +} +} + +$DATAOUTSTREAM$ + +$SAVEASCNPY$ + +} + +""" + # templates for single node ip generation # cpp file From 7deb2969112f5f0e9c19c35db55e5a8cbb3f38e9 Mon Sep 17 00:00:00 2001 From: Hugo LE BLEVEC Date: Wed, 7 May 2025 17:22:51 +0200 Subject: [PATCH 13/16] changing stream names to match the template --- src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index f30f91fad4..eb2b800ee7 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -197,8 +197,8 @@ def strm_decl(self): ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream> debug_out_{} ("out_{}");'.format( - odtype.get_hls_datatype_str(), pe, self.hls_sname(), self.hls_sname() + 'hls::stream> strm ("strm");'.format( + odtype.get_hls_datatype_str(), pe ) ) @@ -224,12 +224,11 @@ def dataoutstrm(self): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'vectorstream2npy<%s, %s, %d>(debug_out_%s, %s, "%s", false);' + 'vectorstream2npy<%s, %s, %d>(strm, %s, "%s", false);' % ( elem_hls_type, npy_type, pe, - self.hls_sname(), shape_cpp_str, npy_out, ) From 12832025b969c31ebd0685e85062a405ab53305e Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 12 Aug 2025 16:11:28 +0100 Subject: [PATCH 14/16] [Deconv] Align custom op with changes from dev --- .../custom_op/fpgadataflow/deconvolution.py | 46 ++-- .../fpgadataflow/hls/deconvolution_hls.py | 208 +----------------- .../fpgadataflow/test_fpgadataflow_deconv.py | 2 + 3 files changed, 35 insertions(+), 221 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/deconvolution.py b/src/finn/custom_op/fpgadataflow/deconvolution.py index 8dd33fea83..ad7a0bda1e 100644 --- a/src/finn/custom_op/fpgadataflow/deconvolution.py +++ b/src/finn/custom_op/fpgadataflow/deconvolution.py @@ -26,7 +26,6 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import numpy as np import warnings from qonnx.core.datatype import DataType @@ -58,18 +57,27 @@ def get_nodeattr_types(self): return my_attrs def get_normal_input_shape(self, ind=0): - ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") - ifm_ch = self.get_nodeattr("IFMChannels") - ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch) + if ind == 0: + ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + ishape = (1, ifm_dim_h, ifm_dim_w, ifm_ch) + else: + ifm_ch = self.get_nodeattr("IFMChannels") + ofm_ch = self.get_nodeattr("OFMChannels") + k_h, k_w = self.get_nodeattr("KernelDim") + ishape = (ofm_ch, k_h, k_w, ifm_ch) return ishape def get_folded_input_shape(self, ind=0): - ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") - ifm_ch = self.get_nodeattr("IFMChannels") - simd = self.get_nodeattr("SIMD") - assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" - fold = int(ifm_ch / simd) - folded_ishape = (1, ifm_dim_h, ifm_dim_w, fold, simd) + if ind == 0: + ifm_dim_h, ifm_dim_w = self.get_nodeattr("IFMDim") + ifm_ch = self.get_nodeattr("IFMChannels") + simd = self.get_nodeattr("SIMD") + assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" + fold = int(ifm_ch / simd) + folded_ishape = (1, ifm_dim_h, ifm_dim_w, fold, simd) + else: + folded_ishape = self.get_normal_input_shape(ind) return folded_ishape def get_normal_output_shape(self, ind=0): @@ -134,11 +142,14 @@ def get_output_datatype(self, ind=0): def get_instream_width(self, ind=0): """Returns stream width, input and output stream width are equal for the sliding window function""" - ibits = self.get_input_datatype().bitwidth() - simd = self.get_nodeattr("SIMD") - ifm_ch = self.get_nodeattr("IFMChannels") - assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" - in_width = simd * ibits + if ind == 0: + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("SIMD") + ifm_ch = self.get_nodeattr("IFMChannels") + assert ifm_ch % simd == 0, "SIMD must divide IFMChannels" + in_width = simd * ibits + else: + in_width = 0 return in_width def get_outstream_width(self, ind=0): @@ -146,11 +157,6 @@ def get_outstream_width(self, ind=0): out_width = o_bits * self.get_nodeattr("PE") return out_width - def get_number_output_values(self): - folded_oshape = self.get_folded_output_shape() - num_output_elems = np.prod(folded_oshape[:-1]) - return num_output_elems - def get_exp_cycles(self): return 0 diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index eb2b800ee7..39c01fa8dc 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -27,18 +27,11 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import numpy as np -import os -from qonnx.core.datatype import DataType from qonnx.util.basic import interleave_matrix_outer_dim_from_partitions -from finn.custom_op.fpgadataflow import templates from finn.custom_op.fpgadataflow.deconvolution import Deconvolution from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend -from finn.util.data_packing import ( - npy_to_rtlsim_input, - numpy_to_hls_code1, - rtlsim_output_to_npy, -) +from finn.util.data_packing import numpy_to_hls_code1 class Deconvolution_hls(Deconvolution, HLSBackend): @@ -159,106 +152,32 @@ def defines(self, var): ) ] - def read_npy_data(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_input_datatype() - elem_hls_type = dtype.get_hls_datatype_str() - simd = self.get_nodeattr("SIMD") - npy_type = "float" - npy_in = "%s/input_0.npy" % code_gen_dir - self.code_gen_dict["$READNPYDATA$"] = [] - # note: the innermost dim is reversed for the input - self.code_gen_dict["$READNPYDATA$"].append( - 'npy2vectorstream<%s, %s, %d>("%s", in0_%s, false);' - % ( - elem_hls_type, - npy_type, - simd, - npy_in, - self.hls_sname(), - ) - ) - - def strm_decl(self): - idtype = self.get_input_datatype() - odtype = self.get_output_datatype() - simd = self.get_nodeattr("SIMD") - pe = self.get_nodeattr("PE") - self.code_gen_dict["$STREAMDECLARATIONS$"] = [] - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream> in0_{} ("in0_{}");'.format( - idtype.get_hls_datatype_str(), simd, self.hls_sname(), self.hls_sname() - ) - ) - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream> out_{} ("out_{}");'.format( - odtype.get_hls_datatype_str(), pe, self.hls_sname(), self.hls_sname() - ) - ) - - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream> strm ("strm");'.format( - odtype.get_hls_datatype_str(), pe - ) - ) - def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [] self.code_gen_dict["$DOCOMPUTE$"].append( """deconv - (weights, in0_{}, out_{});""".format( - self.hls_sname(), - self.hls_sname(), - ) + (weights, in0_V, out0_V);""" ) - def dataoutstrm(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - pe = self.get_nodeattr("PE") - dtype = self.get_output_datatype() - elem_hls_type = dtype.get_hls_datatype_str() - npy_type = "float" - npy_out = "%s/output.npy" % code_gen_dir - shape = self.get_folded_output_shape() - shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") - - # note: the innermost dim is not reversed for the output - self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'vectorstream2npy<%s, %s, %d>(strm, %s, "%s", false);' - % ( - elem_hls_type, - npy_type, - pe, - shape_cpp_str, - npy_out, - ) - ] - def blackboxfunction(self): input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() output_elem_hls_type = self.get_output_datatype().get_hls_datatype_str() simd = self.get_nodeattr("SIMD") pe = self.get_nodeattr("PE") - in_stream = "hls::stream> &in0_%s" % ( + in_stream = "hls::stream> &in0_V" % ( input_elem_hls_type, simd, - self.hls_sname(), ) - out_stream = "hls::stream> &out_%s" % ( + out_stream = "hls::stream> &out0_V" % ( output_elem_hls_type, pe, - self.hls_sname(), ) blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_stream, out_stream) self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() - ] - self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() - ) + self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0_V"] + self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out0_V") self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") self.code_gen_dict["$PRAGMAS$"].append('#include "params.h"') @@ -269,120 +188,7 @@ def pragmas(self): # ) def execute_node(self, context, graph): - mode = self.get_nodeattr("exec_mode") - node = self.onnx_node - - # TODO ensure codegen dir exists - if mode == "cppsim": - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - elif mode == "rtlsim": - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - else: - raise Exception( - """Invalid value for attribute exec_mode! Is currently set to: {} - has to be set to one of the following value ("cppsim", "rtlsim")""".format( - mode - ) - ) - - # create a npy file fore each input of the node (in_ind is input index) - in_ind = 0 - for inputs in node.input: - # it is assumed that the first input of the node is the data input - # the second input are the weights - # the third input are the thresholds - if in_ind == 0: - assert ( - str(context[inputs].dtype) == "float32" - ), """Input datatype is - not float32 as expected.""" - expected_inp_shape = self.get_folded_input_shape() - reshaped_input = context[inputs].reshape(expected_inp_shape) - if self.get_input_datatype() == DataType["BIPOLAR"]: - # store bipolar activations as binary - reshaped_input = (reshaped_input + 1) / 2 - export_idt = DataType["BINARY"] - else: - export_idt = self.get_input_datatype() - # make copy before saving the array - reshaped_input = reshaped_input.copy() - np.save( - os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), - reshaped_input, - ) - elif in_ind > 2: - raise Exception("Unexpected input found for MatrixVectorActivation") - in_ind += 1 - - if mode == "cppsim": - # execute the precompiled model - super().exec_precompiled_singlenode_model() - # load output npy file - super().npy_to_dynamic_output(context) - # reinterpret binary output as bipolar where needed - if self.get_output_datatype() == DataType["BIPOLAR"]: - out = context[node.output[0]] - out = 2 * out - 1 - context[node.output[0]] = out - assert ( - context[node.output[0]].shape == self.get_normal_output_shape() - ), "cppsim did not produce expected output shape" - elif mode == "rtlsim": - sim = self.get_rtlsim() - nbits = self.get_instream_width() - inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) - self.reset_rtlsim(sim) - self.toggle_clk(sim) - output = self.rtlsim(sim, inp) - odt = self.get_output_datatype() - target_bits = odt.bitwidth() - packed_bits = self.get_outstream_width() - out_npy_path = "{}/output.npy".format(code_gen_dir) - out_shape = self.get_folded_output_shape() - rtlsim_output_to_npy(output, out_npy_path, odt, out_shape, packed_bits, target_bits) - - # load and reshape output - output = np.load(out_npy_path) - oshape = self.get_normal_output_shape() - output = np.asarray([output], dtype=np.float32).reshape(*oshape) - context[node.output[0]] = output - else: - raise Exception( - """Invalid value for attribute exec_mode! Is currently set to: {} - has to be set to one of the following value ("cppsim", "rtlsim")""".format( - mode - ) - ) - - def code_generation_cppsim(self, model): - """Generates c++ code for simulation (cppsim).""" - node = self.onnx_node - path = self.get_nodeattr("code_gen_dir_cppsim") - self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] - self.generate_params(model, path) - self.global_includes() - self.defines("cppsim") - self.read_npy_data() - self.strm_decl() - self.pragmas() - self.docompute() - self.dataoutstrm() - self.save_as_npy() - self.timeout_value() - self.timeout_condition() - self.timeout_read_stream() - - template = templates.docompute_template_timeout - - for key in self.code_gen_dict: - # transform list into long string separated by '\n' - code_gen_line = "\n".join(self.code_gen_dict[key]) - template = template.replace(key, code_gen_line) - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") - f.write(template) - f.close() - self.code_gen_dict.clear() + HLSBackend.execute_node(self, context, graph) def timeout_value(self): """Set timeout value for HLS functions defined for one clock cycle""" diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index 3f9282f8e9..ba7f43134b 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -146,6 +146,8 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, + cpp_interface="hls_vector", + hls_style="freerunning", ) node_list = [Deconv] From 7e68ea6a917eb0a8afbc3e69c52bc95b644f980a Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 12 Aug 2025 16:27:15 +0100 Subject: [PATCH 15/16] [Tests] Bring back pixel padding test --- .../fpgadataflow/test_fpgadataflow_deconv.py | 104 +++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_deconv.py b/tests/fpgadataflow/test_fpgadataflow_deconv.py index ba7f43134b..c545200016 100644 --- a/tests/fpgadataflow/test_fpgadataflow_deconv.py +++ b/tests/fpgadataflow/test_fpgadataflow_deconv.py @@ -40,11 +40,22 @@ import finn.core.onnx_exec as oxe from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.convert_to_hw_layers import ( + InferConvInpGen, + InferQuantizedMatrixVectorActivation, +) from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.infer_pixel_padding_deconv import ( + InferPixelPaddingDeconv, +) +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import pynq_part_map test_pynq_board = "Pynq-Z1" @@ -177,6 +188,97 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, return model +# input image dimension +@pytest.mark.parametrize("idim", [[8, 8], [10, 8]]) +# number of rows and number of cols to add +@pytest.mark.parametrize("stride", [[2, 2], [2, 3]]) +# number of channels +@pytest.mark.parametrize("ifm_ch", [2]) +# number of channels +@pytest.mark.parametrize("ofm_ch", [4]) +# Input parallelism +@pytest.mark.parametrize("simd", [1, 2]) +# PE +@pytest.mark.parametrize("pe", [1, 2]) +# kernel size +@pytest.mark.parametrize("k", [2]) +# padding +@pytest.mark.parametrize("padding", [0, 1]) +# exec mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_deconv_pixel_pad( + idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode +): + idt = wdt = DataType["INT4"] + wdt = idt + odt = DataType["INT32"] + idim_h, idim_w = idim + stride_h, stride_w = stride + + ref_model = set_up_reference_model(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding)[0] + + odim_h = (idim_h - 1) * stride_h - 2 * padding + (k - 1) + 1 + odim_w = (idim_w - 1) * stride_w - 2 * padding + (k - 1) + 1 + + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, idim_h, idim_w]) + input_dict = {"inp": input_tensor} + + y_expected = oxe.execute_onnx(ref_model, input_dict)["outp"] + + model = ref_model.transform(InferPixelPaddingDeconv()) + model = model.transform(InferConvInpGen()) + model = model.transform(InferQuantizedMatrixVectorActivation()) + model = model.transform(InferShapes()) + model = model.transform(GiveUniqueNodeNames()) + + y_produced = oxe.execute_onnx(model, input_dict)["outp"] + assert (y_produced == y_expected).all() + + model = model.transform(SpecializeLayers(test_fpga_part)) + model = model.transform(MinimizeAccumulatorWidth()) + + for n in model.graph.node: + if n.op_type.startswith("ConvolutionInputGenerator"): + convinputgen_node = getCustomOp(n) + convinputgen_node.set_nodeattr("SIMD", simd) + elif n.op_type.startswith("MVAU"): + mvau_node = getCustomOp(n) + mvau_node.set_nodeattr("PE", pe) + mvau_node.set_nodeattr("SIMD", simd) + + expected_oshape = (1, ofm_ch, odim_h, odim_w) + + # cppsim + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + + # rtlsim + else: + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + model = model.transform(SetExecMode("rtlsim")) + + y_produced = oxe.execute_onnx(model, input_dict)["outp"] + assert y_produced.shape == expected_oshape + assert (y_produced == y_expected).all() + + if exec_mode == "rtlsim": + node = model.get_nodes_by_op_type("FMPadding_Pixel_hls")[0] + inst = getCustomOp(node) + cycles_rtlsim = inst.get_nodeattr("cycles_rtlsim") + exp_cycles_dict = model.analysis(exp_cycles_per_layer) + exp_cycles = exp_cycles_dict[node.name] + assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) + assert exp_cycles != 0 + + # input image dimension @pytest.mark.parametrize("idim", [[8, 8]]) # number of rows and number of cols to add @@ -198,7 +300,7 @@ def create_deconv_node(idt, wdt, odt, k, idim, ifm_ch, ofm_ch, stride, padding, @pytest.mark.fpgadataflow @pytest.mark.slow @pytest.mark.vivado -def test_fpgadataflow_deconv(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode): +def test_fpgadataflow_deconv_revd2(idim, stride, ifm_ch, ofm_ch, simd, pe, k, padding, exec_mode): idt = wdt = DataType["INT8"] wdt = idt odt = DataType["INT32"] From ee7d7115bb46d3403da433e649042fa1b390ad10 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 12 Aug 2025 16:52:40 +0100 Subject: [PATCH 16/16] [Util] Remove custom numpy to hls code conversion for deconv and use exisiting fct --- .../fpgadataflow/hls/deconvolution_hls.py | 6 ++- src/finn/util/data_packing.py | 51 ------------------- 2 files changed, 4 insertions(+), 53 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py index 39c01fa8dc..d0e90661e3 100644 --- a/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/deconvolution_hls.py @@ -31,7 +31,7 @@ from finn.custom_op.fpgadataflow.deconvolution import Deconvolution from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend -from finn.util.data_packing import numpy_to_hls_code1 +from finn.util.data_packing import numpy_to_hls_code class Deconvolution_hls(Deconvolution, HLSBackend): @@ -83,7 +83,9 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): weight_tensor = self.get_hw_compatible_weight_tensor(weights) export_wdt = self.get_weight_datatype() if weight_file_mode == "hls_header": - weight_hls_code = numpy_to_hls_code1(weight_tensor, export_wdt, "weights", False, True) + weight_hls_code = numpy_to_hls_code(weight_tensor, export_wdt, "weights", False, True) + # remove framing {} + weight_hls_code = weight_hls_code[1:-2] + ";" # write weights into C++ header file as dictated by finn-hlslib f_weights = open(weight_file_name, "w") f_weights.write( diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index 08f801aba2..61773d29b4 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -285,57 +285,6 @@ def elem2str(x): return ret -def numpy_to_hls_code1(ndarray, dtype, hls_var_name, pack_innermost_dim=True, no_decl=False): - """Return C++ code representation of a numpy ndarray with FINN DataType - dtype, using hls_var_name as the resulting C++ variable name. If - pack_innermost_dim is specified, the innermost dimension of the ndarray - will be packed into a hex string using array2hexstring. If no_decl is - set to True, no variable name and type will be generated as part of the - emitted string. - """ - hls_dtype = dtype.get_hls_datatype_str() - if type(ndarray) != np.ndarray or ndarray.dtype != np.float32: - # try to convert to a float numpy array (container dtype is float) - ndarray = np.asarray(ndarray, dtype=np.float32) - if pack_innermost_dim: - idimlen = ndarray.shape[-1] - idimbits = idimlen * dtype.bitwidth() - idimbits = roundup_to_integer_multiple(idimbits, 4) - ndarray = pack_innermost_dim_as_hex_string(ndarray, dtype, idimbits) - hls_dtype = "ap_uint<%d>" % idimbits - ndims = ndarray.ndim - # add type string and variable name - # e.g. "const ap_uint<64>" "weightMem0" - ret = "%s %s" % (hls_dtype, hls_var_name) - # add dimensions - for d in range(ndims): - ret += "[%d]" % ndarray.shape[d] - orig_printops = np.get_printoptions() - np.set_printoptions(threshold=sys.maxsize) - - # define a function to convert a single element into a C++ init string - # a single element can be a hex string if we are using packing - def elem2str(x): - if type(x) == str or type(x) == np.str_: - return '{{"%s",},}' % (x) - elif type(x) == np.float32: - if dtype.is_integer(): - return str(int(x)) - else: - return str(x) - else: - raise Exception("Unsupported type for numpy_to_hls_code") - - strarr = np.array2string(ndarray.flatten(), separator=", ", formatter={"all": elem2str}) - np.set_printoptions(**orig_printops) - strarr = strarr.replace("[", "{").replace("]", ",}") - if no_decl: - ret = strarr + ";" - else: - ret = ret + " = \n" + strarr + ";" - return ret - - def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=True): """Convert the multidimensional NumPy array of integers (stored as floats) from input_file into a flattened sequence of Python arbitrary-precision