diff --git a/hls4ml/backends/vivado/passes/sparsepixels.py b/hls4ml/backends/vivado/passes/sparsepixels.py new file mode 100644 index 0000000000..0dd3329c10 --- /dev/null +++ b/hls4ml/backends/vivado/passes/sparsepixels.py @@ -0,0 +1,400 @@ +from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate +from hls4ml.model.layers import ( + Input, + Reshape, + SparseActivation, + SparseConv2D, + SparseFlatten, + SparseInputReduce, + SparsePooling2D, +) +from hls4ml.model.optimizer import OptimizerPass +from hls4ml.model.optimizer.passes.hgq_proxy_model import FixedPointQuantizer + +sparsepixels_include = ['nnet_utils/nnet_sparsepixels.h'] + +# Optimizer pass: trace hash vars & Flatten->SparseFlatten + + +class SparseGraphOptimizer(OptimizerPass): + """Triggered by SparseInputReduce. Walks the full graph to wire hash variable names, + track spatial dims, and replace Flatten->SparseFlatten.""" + + def match(self, node): + return isinstance(node, SparseInputReduce) and node.get_attr('hash_out_name', None) is None + + def transform(self, model, node): + hash_map = {} + spatial = {} + changed = False + + for name, n in list(model.graph.items()): + if isinstance(n, SparseInputReduce): + h_var = f'sparse_hash_{name}' + n.set_attr('hash_out_name', h_var) + hash_map[name] = h_var + spatial[name] = (n.get_attr('in_height'), n.get_attr('in_width')) + + elif isinstance(n, SparseConv2D): + src = n.inputs[1] if len(n.inputs) > 1 else n.inputs[0] + h_var = hash_map.get(src, hash_map.get(n.inputs[0])) + n.set_attr('hash_in_name', h_var) + n.set_attr('hash_out_name', h_var) + hash_map[name] = h_var + spatial[name] = spatial.get(src, spatial.get(n.inputs[0])) + + elif isinstance(n, FixedPointQuantizer): + src = n.inputs[0] + if src in hash_map: + hash_map[name] = hash_map[src] + spatial[name] = spatial.get(src) + + elif isinstance(n, SparseActivation): + src = n.inputs[0] + h_var = hash_map.get(src) + hash_map[name] = h_var + spatial[name] = spatial.get(src) + + elif isinstance(n, SparsePooling2D): + src = n.inputs[1] if len(n.inputs) > 1 else n.inputs[0] + h_in = hash_map.get(src, hash_map.get(n.inputs[0])) + h_out = f'sparse_hash_{name}' + n.set_attr('hash_in_name', h_in) + n.set_attr('hash_out_name', h_out) + hash_map[name] = h_out + ps = n.get_attr('pool_size') + prev_h, prev_w = spatial.get(src, spatial.get(n.inputs[0], (0, 0))) + spatial[name] = (prev_h // ps, prev_w // ps) + + elif isinstance(n, SparseFlatten): + src = n.inputs[0] + h_var = hash_map.get(src) + if h_var is not None: + n.set_attr('hash_in_name', h_var) + hash_map[name] = h_var + spatial[name] = spatial.get(src, (1, 1)) + + elif isinstance(n, Reshape): + src = n.inputs[0] + if src in hash_map: + src_node = model.graph[src] + n_sparse = src_node.get_attr('n_sparse', None) + if n_sparse is None: + continue + n_chan = src_node.get_attr('n_chan', None) or src_node.get_attr('n_filt', None) + h_var = hash_map[src] + sp = spatial.get(src, (1, 1)) + + attrs = { + 'n_sparse': n_sparse, + 'n_chan': n_chan, + 'out_height': sp[0], + 'out_width': sp[1], + 'hash_in_name': h_var, + } + new_node = model.make_node('SparseFlatten', name, attrs, n.inputs.copy(), outputs=n.outputs.copy()) + model.replace_node(n, new_node) + changed = True + + return changed + + +# Config templates (struct definitions) + +sparse_input_reduce_config = """struct config{index} {{ + static const unsigned in_height = {in_height}; + static const unsigned in_width = {in_width}; + static const unsigned n_chan = {n_chan}; + static const unsigned n_sparse = {n_sparse}; + static const unsigned hash_bits = {hash_bits}; +}};\n""" + +sparse_conv2d_config = """struct config{index} {{ + static const unsigned n_sparse = {n_sparse}; + static const unsigned n_chan = {n_chan}; + static const unsigned n_filt = {n_filt}; + static const unsigned kernel_size = {kernel_size}; + typedef {accum_t.name} accum_t; +}};\n""" + +sparse_activation_config = """struct config{index} {{ + static const unsigned n_sparse = {n_sparse}; + static const unsigned n_chan = {n_chan}; +}};\n""" + +sparse_pooling2d_config = """struct config{index} {{ + static const unsigned n_sparse = {n_sparse}; + static const unsigned n_chan = {n_chan}; + static const unsigned pool_size = {pool_size}; + typedef {accum_t.name} accum_t; +}};\n""" + +sparse_flatten_config = """struct config{index} {{ + static const unsigned n_sparse = {n_sparse}; + static const unsigned n_chan = {n_chan}; + static const unsigned out_height = {out_height}; + static const unsigned out_width = {out_width}; +}};\n""" + + +class SparseInputReduceConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(SparseInputReduce) + self.template = sparse_input_reduce_config + + def format(self, node): + return self.template.format(**self._default_config_params(node)) + + +class SparseConv2DConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(SparseConv2D) + self.template = sparse_conv2d_config + + def format(self, node): + return self.template.format(**self._default_config_params(node)) + + +class SparseActivationConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(SparseActivation) + self.template = sparse_activation_config + + def format(self, node): + return self.template.format(**self._default_config_params(node)) + + +class SparsePooling2DConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(SparsePooling2D) + self.template = sparse_pooling2d_config + + def format(self, node): + return self.template.format(**self._default_config_params(node)) + + +class SparseFlattenConfigTemplate(LayerConfigTemplate): + def __init__(self): + super().__init__(SparseFlatten) + self.template = sparse_flatten_config + + def format(self, node): + return self.template.format(**self._default_config_params(node)) + + +# Function-call templates + +sparse_input_reduce_function = ( + '{input_t} threshold_{index} = {threshold};\n' + 'ap_uint<{hash_bits}> {hash_out}[{n_sparse} * 2];\n' + '#pragma HLS ARRAY_PARTITION variable={hash_out} complete dim=0\n' + 'sparse_input_reduce<{input_t}, {output_t}, ap_uint<{hash_bits}>, {in_height}, {in_width}, {n_chan}, {n_sparse}>' + '({input}, threshold_{index}, {output}, {hash_out});' +) + +sparse_conv2d_function = ( + 'sparse_conv<{input_t}, {output_t}, ap_uint<{hash_bits}>, {weight_t}, {bias_t}, {accum_t_name}, ' + '{n_sparse}, {n_chan}, {n_filt}, {kernel_size}>' + '({input}, {output}, {hash_in}, {w}, {b});' +) + +sparse_activation_function = 'sparse_relu<{input_t}, {output_t}, {n_sparse}, {n_chan}>({input}, {output});' + +sparse_pooling2d_function = ( + 'ap_uint<{hash_bits}> {hash_out}[{n_sparse} * 2];\n' + '#pragma HLS ARRAY_PARTITION variable={hash_out} complete dim=0\n' + 'sparse_pooling_avg<{input_t}, {output_t}, ap_uint<{hash_bits}>, {accum_t_name}, {n_sparse}, {n_chan}, {pool_size}>' + '({input}, {output}, {hash_in}, {hash_out});' +) + +sparse_flatten_function = ( + 'sparse_flatten<{input_t}, {output_t}, ap_uint<{hash_bits}>, {out_height}, {out_width}, {n_chan}, {n_sparse}>' + '({input}, {hash_in}, {output});' +) + + +def _get_hash_bits(node): + inp = node + while inp is not None: + hb = inp.get_attr('hash_bits', None) + if hb is not None: + return hb + if len(inp.inputs) > 0: + inp = inp.model.graph.get(inp.inputs[0]) + else: + break + return 10 + + +class SparseInputReduceFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(SparseInputReduce, include_header=sparsepixels_include) + self.template = sparse_input_reduce_function + + def format(self, node): + params = self._default_function_params(node) + params['in_height'] = node.get_attr('in_height') + params['in_width'] = node.get_attr('in_width') + params['n_chan'] = node.get_attr('n_chan') + params['n_sparse'] = node.get_attr('n_sparse') + params['hash_bits'] = node.get_attr('hash_bits') + params['threshold'] = node.get_attr('threshold') + params['hash_out'] = node.get_attr('hash_out_name') + return self.template.format(**params) + + +class SparseConv2DFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(SparseConv2D, include_header=sparsepixels_include) + self.template = sparse_conv2d_function + + def format(self, node): + params = self._default_function_params(node) + params['n_sparse'] = node.get_attr('n_sparse') + params['n_chan'] = node.get_attr('n_chan') + params['n_filt'] = node.get_attr('n_filt') + params['kernel_size'] = node.get_attr('kernel_size') + params['hash_bits'] = _get_hash_bits(node) + params['hash_in'] = node.get_attr('hash_in_name') + params['w'] = node.get_weights('weight').name + params['b'] = node.get_weights('bias').name + params['weight_t'] = node.get_weights('weight').type.name + params['bias_t'] = node.get_weights('bias').type.name + params['accum_t_name'] = node.get_attr('accum_t').name + return self.template.format(**params) + + +class SparseActivationFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(SparseActivation, include_header=sparsepixels_include) + self.template = sparse_activation_function + + def format(self, node): + params = self._default_function_params(node) + params['n_sparse'] = node.get_attr('n_sparse') + params['n_chan'] = node.get_attr('n_chan') + return self.template.format(**params) + + +class SparsePooling2DFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(SparsePooling2D, include_header=sparsepixels_include) + self.template = sparse_pooling2d_function + + def format(self, node): + params = self._default_function_params(node) + params['n_sparse'] = node.get_attr('n_sparse') + params['n_chan'] = node.get_attr('n_chan') + params['pool_size'] = node.get_attr('pool_size') + params['hash_bits'] = _get_hash_bits(node) + params['hash_in'] = node.get_attr('hash_in_name') + params['hash_out'] = node.get_attr('hash_out_name') + params['accum_t_name'] = node.get_attr('accum_t').name + return self.template.format(**params) + + +class SparseFlattenFunctionTemplate(FunctionCallTemplate): + def __init__(self): + super().__init__(SparseFlatten, include_header=sparsepixels_include) + self.template = sparse_flatten_function + + def format(self, node): + params = self._default_function_params(node) + params['n_sparse'] = node.get_attr('n_sparse') + params['n_chan'] = node.get_attr('n_chan') + params['out_height'] = node.get_attr('out_height') + params['out_width'] = node.get_attr('out_width') + params['hash_bits'] = _get_hash_bits(node) + params['hash_in'] = node.get_attr('hash_in_name') + return self.template.format(**params) + + +# Optimizer pass: fix Input precision for sparse models + + +class SparseFixInputPrecision(OptimizerPass): + """Fix Input precision for sparse models. + + The standard FixInputPrecision cannot find FixedPointQuantizer nodes through + sparse layers (Input -> SparseInputReduce -> FPQ), so it falls back to a + minimal type. This pass corrects the Input precision using the downstream + FPQ's mask, then re-registers SparseInputReduce with the corrected type. + """ + + def match(self, node): + if not isinstance(node, Input): + return False + model = node.model + for layer in model.graph.values(): + if isinstance(layer, SparseInputReduce) and node.name in layer.inputs: + return True + return False + + def transform(self, model, node): + from hls4ml.model.optimizer.passes.bit_exact import ( + produce_kif, + register_precision, + to_hls4ml_fixed, + ) + + sparse_reduce = None + for layer in model.graph.values(): + if isinstance(layer, SparseInputReduce) and node.name in layer.inputs: + sparse_reduce = layer + break + if sparse_reduce is None: + return False + + fpq = None + for layer in model.graph.values(): + if isinstance(layer, FixedPointQuantizer) and sparse_reduce.name in layer.inputs: + fpq = layer + break + if fpq is None: + return False + + # Read FPQ's output type, which was correctly set by BitExact's + # register_precision using per-element max(k), max(i), max(f). + # We do NOT call _produce_kif(fpq) here because that would re-clip + # against the currently-wrong Input precision (set to ap_ufixed<1,0> + # by the standard FixInputPrecision which can't recurse through sparse layers). + fpq_prec = fpq.get_output_variable().type.precision + k = 1 if fpq_prec.signed else 0 + i = fpq_prec.integer - k + f = fpq_prec.width - fpq_prec.integer + + new_type = to_hls4ml_fixed(k, i, f + 1, f'{node.name}_t') + if hasattr(fpq, 'SAT') and fpq.SAT in ('SAT', 'SAT_SYM'): + new_type.precision.saturation_mode = 'SAT' + else: + new_type.precision.saturation_mode = 'WRAP' + node.get_output_variable().type = new_type + node.model.config.layer_name_precision[node.name] = str(new_type) + node.attributes['trusted'] = True + + produce_kif(sparse_reduce, force_reset=True) + register_precision(sparse_reduce) + for attr in ('_produce_kif', '_request_kif'): + if attr in sparse_reduce.attributes: + del sparse_reduce.attributes[attr] + + return False + + +# Backend registration hook + + +def register_sparsepixels(backend): + backend.register_pass('sparse_graph_optimizer', SparseGraphOptimizer) + backend.register_pass('sparse_fix_input_precision', SparseFixInputPrecision) + + backend.register_pass('sparseinputreduce_config_template', SparseInputReduceConfigTemplate) + backend.register_pass('sparseinputreduce_function_template', SparseInputReduceFunctionTemplate) + backend.register_pass('sparseconv2d_config_template', SparseConv2DConfigTemplate) + backend.register_pass('sparseconv2d_function_template', SparseConv2DFunctionTemplate) + backend.register_pass('sparseactivation_config_template', SparseActivationConfigTemplate) + backend.register_pass('sparseactivation_function_template', SparseActivationFunctionTemplate) + backend.register_pass('sparsepooling2d_config_template', SparsePooling2DConfigTemplate) + backend.register_pass('sparsepooling2d_function_template', SparsePooling2DFunctionTemplate) + backend.register_pass('sparseflatten_config_template', SparseFlattenConfigTemplate) + backend.register_pass('sparseflatten_function_template', SparseFlattenFunctionTemplate) diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index 879784465a..5014f6836f 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -163,6 +163,8 @@ def _register_flows(self): quantization_flow = register_flow('quantization', quantization_passes, requires=[init_flow], backend=self.name) optimization_passes = [ + 'vivado:sparse_graph_optimizer', + 'vivado:sparse_fix_input_precision', 'vivado:remove_final_reshape', 'vivado:optimize_pointwise_conv', 'vivado:inplace_parallel_reshape', diff --git a/hls4ml/converters/keras_v3/__init__.py b/hls4ml/converters/keras_v3/__init__.py index 21950aea6c..7208d06efd 100644 --- a/hls4ml/converters/keras_v3/__init__.py +++ b/hls4ml/converters/keras_v3/__init__.py @@ -6,6 +6,7 @@ merge, # noqa: F401 pooling, # noqa: F401 recurrent, # noqa: F401 + sparsepixels, # noqa: F401 ) from ._base import registry as layer_handlers diff --git a/hls4ml/converters/keras_v3/sparsepixels.py b/hls4ml/converters/keras_v3/sparsepixels.py new file mode 100644 index 0000000000..066e6a070e --- /dev/null +++ b/hls4ml/converters/keras_v3/sparsepixels.py @@ -0,0 +1,250 @@ +import math +import typing +from collections.abc import Sequence +from typing import Any + +import numpy as np + +from ._base import KerasV3LayerHandler + +if typing.TYPE_CHECKING: + import keras + from keras import KerasTensor + +_sparse_context: dict[str, Any] = {} + + +def _mark_sparse_output(tensor_name: str, n_sparse: int, n_chan: int, height: int, width: int): + """Record a tensor as coming from a sparse layer so Flatten can be converted.""" + sparse_outputs = _sparse_context.setdefault('sparse_output_tensors', {}) + sparse_outputs[tensor_name] = { + 'n_sparse': n_sparse, + 'n_chan': n_chan, + 'out_height': height, + 'out_width': width, + } + + +def _extract_sparse_iq_config(conv_layer, in_tensor_name: str, n_sparse: int, n_chan: int) -> dict[str, Any]: + """Extract input quantizer config from QConv2D, adapted for sparse tensor shape.""" + from keras import ops + + internal_q = conv_layer._iq.quantizer + kif_k, kif_i, kif_f = internal_q.kif + kif_k = np.ravel(ops.convert_to_numpy(kif_k)).astype(np.int16) + kif_i = np.ravel(ops.convert_to_numpy(kif_i)).astype(np.int16) + kif_f = np.ravel(ops.convert_to_numpy(kif_f)).astype(np.int16) + + # HGQ quantizers may be per-element (H*W*C); reduce to per-channel + # Take max of each component independently to get the envelope type + if kif_k.size > n_chan: + kif_k = np.max(kif_k.reshape(-1, n_chan), axis=0) + kif_i = np.max(kif_i.reshape(-1, n_chan), axis=0) + kif_f = np.max(kif_f.reshape(-1, n_chan), axis=0) + + # Reconstruct KBI from KIF: B = k + i + f, I_bits = k + i + k = kif_k + B = kif_k + kif_i + kif_f + I_bits = kif_k + kif_i + + if k.size > 1: + k = np.tile(k, n_sparse).reshape(1, -1) + B = np.tile(B, n_sparse).reshape(1, -1) + I_bits = np.tile(I_bits, n_sparse).reshape(1, -1) + + overflow_mode: str = internal_q.overflow_mode + round_mode: str = internal_q.round_mode + if round_mode.startswith('S_'): + round_mode = round_mode[2:] + + return { + 'name': conv_layer._iq.name, + 'class_name': 'FixedPointQuantizer', + 'mask_kbi': (k, B, I_bits), + 'SAT': overflow_mode, + 'RND': round_mode, + 'fusible': None, + 'input_keras_tensor_names': [in_tensor_name], + 'output_keras_tensor_names': [f'{in_tensor_name}_q'], + 'overrides': {}, + } + + +def post_process_sparse_layer_list(layer_list: list[dict[str, Any]]) -> None: + """Convert Reshape (from Flatten) nodes that follow sparse layers into SparseFlatten. + Called from keras_v3_to_hls after parsing.""" + sparse_outputs = _sparse_context.get('sparse_output_tensors', {}) + if not sparse_outputs: + return + + for conf in layer_list: + if conf.get('class_name') != 'Reshape': + continue + in_tensors = conf.get('input_keras_tensor_names', []) + if not in_tensors: + continue + src_tensor = in_tensors[0] + if src_tensor not in sparse_outputs: + continue + info = sparse_outputs[src_tensor] + conf['class_name'] = 'SparseFlatten' + conf['n_sparse'] = info['n_sparse'] + conf['n_chan'] = info['n_chan'] + conf['out_height'] = info['out_height'] + conf['out_width'] = info['out_width'] + conf.pop('target_shape', None) + + +class InputReduceHandler(KerasV3LayerHandler): + handles = ('sparsepixels.layers.InputReduce',) + + def handle( + self, + layer: 'keras.Layer', + in_tensors: Sequence['KerasTensor'], + out_tensors: Sequence['KerasTensor'], + ): + in_shape: tuple[int, ...] = in_tensors[0].shape[1:] # type: ignore + in_height, in_width, n_chan = in_shape + + n_sparse = layer.n_max_pixels + threshold = float(layer.threshold) if layer.threshold is not None else 0.0 + + # Clear any stale state from a previous conversion in the same Python process + _sparse_context.clear() + _sparse_context['n_sparse'] = n_sparse + _sparse_context['spatial'] = (int(in_height), int(in_width)) + + for t in out_tensors: + _mark_sparse_output(t.name, n_sparse, int(n_chan), int(in_height), int(in_width)) + + # Hash stores 1-based H and W coordinates separately (see nnet_sparsepixels.h::sparse_input_reduce). + # Spatial dims only shrink through the network (pooling), so input H/W bound the required bits. + max_dim = max(int(in_height), int(in_width)) + hash_bits = max(1, math.ceil(math.log2(max_dim + 1))) + + return { + 'class_name': 'SparseInputReduce', + 'in_height': int(in_height), + 'in_width': int(in_width), + 'n_chan': int(n_chan), + 'n_sparse': n_sparse, + 'threshold': threshold, + 'hash_bits': hash_bits, + } + + +class QConv2DSparseHandler(KerasV3LayerHandler): + handles = ('sparsepixels.layers.QConv2DSparse',) + + def handle( + self, + layer: 'keras.Layer', + in_tensors: Sequence['KerasTensor'], + out_tensors: Sequence['KerasTensor'], + ): + import keras + from keras import ops + + conv = layer.conv + n_chan = int(conv.kernel.shape[2]) + n_filt = int(conv.filters) + kernel_size = int(conv.kernel_size[0]) + n_sparse = _sparse_context.get('n_sparse', 0) + + if hasattr(conv, 'qkernel'): + weight_data = ops.convert_to_numpy(conv.qkernel) + else: + weight_data = ops.convert_to_numpy(conv.kernel) + + bias_data = None + if layer._use_bias and hasattr(layer, 'sparse_bias'): + if hasattr(layer, '_bq'): + bias_data = ops.convert_to_numpy(layer._bq(layer.sparse_bias)) + else: + bias_data = ops.convert_to_numpy(layer.sparse_bias) + + name = layer.name + in_tensor_names = [t.name for t in in_tensors] + out_tensor_names = [t.name for t in out_tensors] + + iq_conf = None + has_iq = hasattr(conv, '_iq') and hasattr(conv, '_enable_iq') and conv._enable_iq + if has_iq: + iq_conf = _extract_sparse_iq_config(conv, in_tensors[0].name, n_sparse, n_chan) + in_tensor_names = [f'{in_tensors[0].name}_q'] + + config: dict[str, Any] = { + 'class_name': 'SparseConv2D', + 'name': name, + 'n_sparse': n_sparse, + 'n_chan': n_chan, + 'n_filt': n_filt, + 'kernel_size': kernel_size, + 'weight_data': weight_data, + 'bias_data': bias_data, + 'input_keras_tensor_names': in_tensor_names, + 'output_keras_tensor_names': out_tensor_names, + } + + activation = layer._activation + spatial = _sparse_context.get('spatial', (1, 1)) + results: list[dict[str, Any]] = [] + if iq_conf is not None: + results.append(iq_conf) + + if activation not in (None, keras.activations.linear): + act_name = activation.__name__ + intermediate = f'{out_tensors[0].name}_sparse_act' + + config['output_keras_tensor_names'] = [intermediate] + + act_config: dict[str, Any] = { + 'class_name': 'SparseActivation', + 'name': f'{name}_{act_name}', + 'activation': act_name, + 'n_sparse': n_sparse, + 'n_chan': n_filt, + 'input_keras_tensor_names': [intermediate], + 'output_keras_tensor_names': out_tensor_names, + } + for t_name in out_tensor_names: + _mark_sparse_output(t_name, n_sparse, n_filt, spatial[0], spatial[1]) + results.extend([config, act_config]) + return tuple(results) + + for t_name in out_tensor_names: + _mark_sparse_output(t_name, n_sparse, n_filt, spatial[0], spatial[1]) + results.append(config) + return tuple(results) + + +class AveragePooling2DSparseHandler(KerasV3LayerHandler): + handles = ('sparsepixels.layers.AveragePooling2DSparse',) + + def handle( + self, + layer: 'keras.Layer', + in_tensors: Sequence['KerasTensor'], + out_tensors: Sequence['KerasTensor'], + ): + pool_size = int(layer.avg_pool.pool_size[0]) + + feat_shape: tuple[int, ...] = in_tensors[0].shape[1:] # type: ignore + n_chan = int(feat_shape[-1]) + n_sparse = _sparse_context.get('n_sparse', 0) + + prev_h, prev_w = _sparse_context.get('spatial', (1, 1)) + new_h, new_w = prev_h // pool_size, prev_w // pool_size + _sparse_context['spatial'] = (new_h, new_w) + + out_tensor_names = [t.name for t in out_tensors] + for t_name in out_tensor_names: + _mark_sparse_output(t_name, n_sparse, n_chan, new_h, new_w) + + return { + 'class_name': 'SparsePooling2D', + 'n_sparse': n_sparse, + 'n_chan': n_chan, + 'pool_size': pool_size, + } diff --git a/hls4ml/converters/keras_v3_to_hls.py b/hls4ml/converters/keras_v3_to_hls.py index 359bc391d6..697a6dfdce 100644 --- a/hls4ml/converters/keras_v3_to_hls.py +++ b/hls4ml/converters/keras_v3_to_hls.py @@ -352,6 +352,14 @@ def parse_keras_v3_model(model: 'keras.Model', allow_da_fallback=True, allow_v2_ # If no layer was added in the loop, then there is a circular dependency raise ValueError('Circular dependency detected') + # Post-process: convert Flatten following sparse layers to SparseFlatten + try: + from hls4ml.converters.keras_v3.sparsepixels import post_process_sparse_layer_list + + post_process_sparse_layer_list(layer_list) + except ImportError: + pass + # Mark inputs[inp layer name] for ModelGraph to parse from i/o keras tensor names provides: dict[str, str] = {} # tensor_name -> src_layer_name for conf in layer_list: diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py index 8bd8cd8a11..23b58beae8 100644 --- a/hls4ml/model/layers.py +++ b/hls4ml/model/layers.py @@ -1782,6 +1782,92 @@ def initialize(self): self.add_output_variable(shape) +class SparseInputReduce(Layer): + _expected_attributes = [ + Attribute('in_height'), + Attribute('in_width'), + Attribute('n_chan'), + Attribute('n_sparse'), + Attribute('threshold', value_type=float), + Attribute('hash_bits', value_type=int, default=10), + ] + + def initialize(self): + shape = [self.attributes['n_sparse'] * self.attributes['n_chan']] + self.add_output_variable(shape) + + +class SparseConv2D(Layer): + _expected_attributes = [ + Attribute('n_sparse'), + Attribute('n_chan'), + Attribute('n_filt'), + Attribute('kernel_size'), + WeightAttribute('weight'), + WeightAttribute('bias'), + TypeAttribute('weight'), + TypeAttribute('bias'), + TypeAttribute('accum'), + ] + + def initialize(self): + shape = [self.attributes['n_sparse'] * self.attributes['n_filt']] + self.add_output_variable(shape) + self.add_weights(quantizer=self.get_attr('weight_quantizer')) + self.add_bias(quantizer=self.get_attr('bias_quantizer')) + + def add_bias(self, quantizer=None): + data = self.get_attr('bias_data', None) + precision = None + type_name = None + if data is None: + data = np.zeros(self.attributes['n_filt']) + precision = IntegerPrecisionType(width=1, signed=False) + type_name = 'bias{index}_t' + quantizer = None + self.add_weights_variable( + name='bias', var_name='b{index}', type_name=type_name, precision=precision, data=data, quantizer=quantizer + ) + + +class SparseActivation(Layer): + _expected_attributes = [ + Attribute('n_sparse'), + Attribute('n_chan'), + Attribute('activation', value_type=str), + ] + + def initialize(self): + shape = [self.attributes['n_sparse'] * self.attributes['n_chan']] + self.add_output_variable(shape) + + +class SparsePooling2D(Layer): + _expected_attributes = [ + Attribute('n_sparse'), + Attribute('n_chan'), + Attribute('pool_size'), + TypeAttribute('accum'), + ] + + def initialize(self): + shape = [self.attributes['n_sparse'] * self.attributes['n_chan']] + self.add_output_variable(shape) + + +class SparseFlatten(Layer): + _expected_attributes = [ + Attribute('n_sparse'), + Attribute('n_chan'), + Attribute('out_height'), + Attribute('out_width'), + ] + + def initialize(self): + shape = [self.attributes['out_height'] * self.attributes['out_width'] * self.attributes['n_chan']] + self.add_output_variable(shape) + + layer_map = { 'Input': Input, 'InputLayer': Input, @@ -1860,6 +1946,12 @@ def initialize(self): # TensorFlow-specific layers: 'BiasAdd': BiasAdd, 'DACombinational': DACombinational, + # Sparsepixels layers: + 'SparseInputReduce': SparseInputReduce, + 'SparseConv2D': SparseConv2D, + 'SparseActivation': SparseActivation, + 'SparsePooling2D': SparsePooling2D, + 'SparseFlatten': SparseFlatten, } diff --git a/hls4ml/model/optimizer/passes/bit_exact.py b/hls4ml/model/optimizer/passes/bit_exact.py index 88dc65c806..014d23e78e 100644 --- a/hls4ml/model/optimizer/passes/bit_exact.py +++ b/hls4ml/model/optimizer/passes/bit_exact.py @@ -34,6 +34,11 @@ Pooling2D, Reshape, Softmax, + SparseActivation, + SparseConv2D, + SparseFlatten, + SparseInputReduce, + SparsePooling2D, Transpose, ) from hls4ml.model.optimizer import ModelOptimizerPass, OptimizerPass @@ -197,6 +202,24 @@ def _(layer: Transpose): return ((k, i, f),) +@_request_kif.register +def _(layer: SparsePooling2D): + """SparsePooling2D has two inputs: features (idx=0) and hash (idx=1). + The hash input is an integer side-channel and must not widen the upstream's precision. + Return minimum values for the hash input so np.maximum in requested_kif does not + override the narrow request from the hash-producer's other downstream consumers (e.g. a FPQ).""" + # Default: max precision for the feature input (same as no dispatch) + feat_shape = get_input_shapes(layer)[0] + feat_kif = _maximum_kif_at_shape(feat_shape) + if len(get_input_shapes(layer)) > 1: + hash_shape = get_input_shapes(layer)[1] + k2 = np.zeros(hash_shape, dtype=np.int16) + i2 = np.full(hash_shape, -127, dtype=np.int16) + f2 = np.full(hash_shape, -127, dtype=np.int16) + return (feat_kif, (k2, i2, f2)) + return (feat_kif,) + + @_request_kif.register def _(layer: DACombinational): comb = layer.attributes['da_comb_trace'] @@ -677,6 +700,88 @@ def _(layer: Embedding): return k, i, f +@_produce_kif.register +def _(layer: SparseInputReduce): + k_in, i_in, f_in = get_input_kifs(layer)[0] + n_chan = layer.attributes['n_chan'] + n_sparse = layer.attributes['n_sparse'] + k_ch = np.max(k_in.reshape(-1, n_chan), axis=0) + i_ch = np.max(i_in.reshape(-1, n_chan), axis=0) + f_ch = np.max(f_in.reshape(-1, n_chan), axis=0) + return np.tile(k_ch, n_sparse), np.tile(i_ch, n_sparse), np.tile(f_ch, n_sparse) + + +@_produce_kif.register +def _(layer: SparseConv2D): + kernel = layer.attributes['weight'].data + _bias = layer.attributes['bias'] + bias = _bias.data if _bias is not None else 0 + k_in, i_in, f_in = get_input_kifs(layer)[0] + + n_sparse = layer.attributes['n_sparse'] + n_chan = layer.attributes['n_chan'] + n_filt = layer.attributes['n_filt'] + ks = layer.attributes['kernel_size'] + + # Match standard Conv2D precision: each output pixel accumulates ks*ks*n_chan + # MAC terms (the kernel window), same as dense conv. The sparse loop iterates + # n_sparse input pixels, but only those within the kernel radius contribute; + # the rest add 0. So the worst-case accumulation depth is ks*ks*n_chan, not n_sparse. + k_ch = np.tile(k_in[:n_chan], ks * ks) + i_ch = np.tile(i_in[:n_chan], ks * ks) + f_ch = np.tile(f_in[:n_chan], ks * ks) + qint_in = QIntervalArray.from_kif(k_ch, i_ch, f_ch) + + kernel_flat = kernel.reshape(-1, n_filt) # (ks*ks*n_chan, n_filt) + qint_out = qint_in @ kernel_flat + qint_out = qint_out + bias + k, i, f = qint_out.to_kif() + return ( + np.tile(k, n_sparse).astype(np.int16), + np.tile(i, n_sparse).astype(np.int16), + np.tile(f, n_sparse).astype(np.int16), + ) + + +@_produce_kif.register +def _(layer: SparseActivation): + k_in, i_in, f_in = get_input_kifs(layer)[0] + act = layer.attributes.get('activation', 'relu').lower() + if act == 'relu': + return np.zeros_like(k_in), i_in, f_in + return k_in, i_in, f_in + + +@_produce_kif.register +def _(layer: SparsePooling2D): + k_in, i_in, f_in = get_input_kifs(layer)[0] + # Average pooling divides by pool_size^2, adding fractional bits. + # Match standard Pooling2D: add ceil(log2(pool_size^2)) fractional bits. + pool_size = layer.attributes['pool_size'] + n_chan = layer.attributes['n_chan'] + extra_f = int(np.ceil(np.log2(pool_size * pool_size))) + k_ch = k_in[:n_chan] + i_ch = i_in[:n_chan] + f_ch = f_in[:n_chan] + extra_f + n_sparse = layer.attributes['n_sparse'] + return ( + np.tile(k_ch, n_sparse).astype(np.int16), + np.tile(i_ch, n_sparse).astype(np.int16), + np.tile(f_ch, n_sparse).astype(np.int16), + ) + + +@_produce_kif.register +def _(layer: SparseFlatten): + k_in, i_in, f_in = get_input_kifs(layer)[0] + n_chan = layer.attributes['n_chan'] + out_h = layer.attributes['out_height'] + out_w = layer.attributes['out_width'] + k_ch, i_ch, f_ch = k_in[:n_chan], i_in[:n_chan], f_in[:n_chan] + n_out = out_h * out_w + return np.tile(k_ch, n_out), np.tile(i_ch, n_out), np.tile(f_ch, n_out) + + def kif_arrs_to_ints(arr: tuple[np.ndarray, np.ndarray, np.ndarray]): return tuple(int(np.max(a)) for a in arr) @@ -966,6 +1071,8 @@ def get_output_layers_and_quantizers( elif isinstance(_node, (Reshape, Transpose, Concatenate)): layers.append(_node) get_output_layers_and_quantizers(_node, layers, quantizers) + elif isinstance(_node, (SparseInputReduce, SparseConv2D, SparseActivation, SparsePooling2D, SparseFlatten)): + layers.append(_node) else: raise ValueError(f'Layer {node.name} ({node.class_name}) unexpected input layer chain.') return layers, quantizers diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sparsepixels.h b/hls4ml/templates/vivado/nnet_utils/nnet_sparsepixels.h new file mode 100644 index 0000000000..41e5953f75 --- /dev/null +++ b/hls4ml/templates/vivado/nnet_utils/nnet_sparsepixels.h @@ -0,0 +1,254 @@ +#ifndef NNET_SPARSEPIXELS_H_ +#define NNET_SPARSEPIXELS_H_ + +#include "ap_fixed.h" +#include "ap_int.h" + +constexpr int _sp_floorlog2(int x) { return (x < 2) ? 0 : 1 + _sp_floorlog2(x / 2); } +constexpr int _sp_pow2(int x) { return x == 0 ? 1 : 2 * _sp_pow2(x - 1); } +// ceil(log2(x)): bits needed to encode values 0..x-1 +constexpr int _sp_ceillog2(int x) { return (x <= 1) ? 1 : _sp_floorlog2(x - 1) + 1; } + +template struct value_idx_pair { + T value; + ap_uint index; +}; + +template class Op_active { + public: + T operator()(T a, T b, t threshold) { + if (a.value > threshold) + return a; + else if (b.value > threshold) + return b; + else { + T none; + none.value = 0; + none.index = 0; + return none; + } + } +}; + +template T find_active(T *x, Op op, t threshold) { + #pragma HLS INLINE + static constexpr int leftN = _sp_pow2(_sp_floorlog2(N - 1)) > 0 ? _sp_pow2(_sp_floorlog2(N - 1)) : 0; + static constexpr int rightN = N - leftN > 0 ? N - leftN : 0; + + if (N == 1) { + return x[0]; + } + if (N == 2) { + return op(x[0], x[1], threshold); + } + return op(find_active(x, op, threshold), find_active(x + leftN, op, threshold), + threshold); +} + +template +void sparse_input_reduce(data_T input_arr[N_h * N_w * N_c], data_T threshold, res_T sparse_arr_feat[N_sparse * N_c], + hash_T sparse_arr_hash[N_sparse * 2]) { + + // Flat pixel index ranges over 0..N_h*N_w-1 -> auto-sized to minimum bits + static constexpr int IDX_BITS = _sp_ceillog2(N_h * N_w); + typedef value_idx_pair pair_t; + + pair_t pair_arr[N_h * N_w]; + int j_h_arr[N_h * N_w]; + int j_w_arr[N_h * N_w]; + #pragma HLS ARRAY_PARTITION variable = j_h_arr type = complete dim = 0 + #pragma HLS ARRAY_PARTITION variable = j_w_arr type = complete dim = 0 + #pragma HLS ARRAY_PARTITION variable = pair_arr type = complete dim = 0 + +DataPrepareLoop: + for (int j = 0; j < N_h * N_w; j++) { + #pragma HLS UNROLL + pair_arr[j].value = input_arr[N_c * j]; + pair_arr[j].index = j; + + int remainder = j % (N_h * N_w); + int j_h = remainder / N_w + 1; + int j_w = remainder % N_w + 1; + + j_h_arr[j] = j_h; + j_w_arr[j] = j_w; + } + + Op_active op_active; +MaxPixelsLoop: + for (int i = 0; i < N_sparse; i++) { + #pragma HLS PIPELINE + pair_t pair = find_active, data_T>(pair_arr, op_active, threshold); + sparse_arr_feat[N_c * i] = (res_T)pair.value; + for (int j = 1; j < N_c; j++) { + #pragma HLS UNROLL + sparse_arr_feat[N_c * i + j] = (res_T)input_arr[N_c * pair.index + j]; + } + + sparse_arr_hash[2 * i] = j_h_arr[pair.index]; + sparse_arr_hash[2 * i + 1] = j_w_arr[pair.index]; + + pair_arr[pair.index].value = 0; + } +} + +template +accum_T mult_for_sparse_conv_kernel(int offset_h, int offset_w, data_T sparse_arr_feat_in[n_chan * N_sparse], + w_T filt_w[ker_size * ker_size * n_chan * n_filt], int i_filt, int i_pixel_in) { + #pragma HLS INLINE + constexpr int R = (ker_size - 1) / 2; + if ((unsigned)(offset_h + R) >= ker_size || (unsigned)(offset_w + R) >= ker_size) { + return (accum_T)0; + } + ap_uint<4> row = R - offset_h; + ap_uint<4> col = R - offset_w; + ap_uint<7> pos = row * ker_size + col; + + accum_T acc = 0; +MultLoopPerFilter: + for (int i_chan = 0; i_chan < n_chan; i_chan++) { + #pragma HLS UNROLL + int w_idx = n_filt * n_chan * pos + n_filt * i_chan + i_filt; + acc += filt_w[w_idx] * sparse_arr_feat_in[n_chan * i_pixel_in + i_chan]; + } + return acc; +} + +template +void sparse_conv(data_T sparse_arr_feat_in[N_sparse * n_chan], res_T sparse_arr_feat_out[N_sparse * n_filt], + hash_T sparse_arr_hash[N_sparse * 2], w_T w[ker_size * ker_size * n_chan * n_filt], b_T b[n_filt]) { + +OutputPixelLoop: + for (int i_pixel_out = 0; i_pixel_out < N_sparse; i_pixel_out++) { + #pragma HLS UNROLL + + bool nonzero = false; + for (int i_chan = 0; i_chan < n_chan; i_chan++) { + #pragma HLS UNROLL + nonzero |= (sparse_arr_feat_in[i_pixel_out * n_chan + i_chan] != (data_T)0); + } + + OutputFilterLoop: + for (int i_filt = 0; i_filt < n_filt; i_filt++) { + #pragma HLS UNROLL + accum_T acc = 0; + + InputPixelLoop: + for (int i_pixel_in = 0; i_pixel_in < N_sparse; i_pixel_in++) { + #pragma HLS UNROLL + int offset_h = sparse_arr_hash[2 * i_pixel_out] - sparse_arr_hash[2 * i_pixel_in]; + int offset_w = sparse_arr_hash[2 * i_pixel_out + 1] - sparse_arr_hash[2 * i_pixel_in + 1]; + + acc += mult_for_sparse_conv_kernel( + offset_h, offset_w, sparse_arr_feat_in, w, i_filt, i_pixel_in); + } + + if (acc != 0) { + acc += b[i_filt]; + } + if (nonzero == false) { + acc = 0; + } + sparse_arr_feat_out[n_filt * i_pixel_out + i_filt] = (res_T)acc; + } + } +} + +template +void sparse_relu(data_T sparse_arr_feat_in[N_sparse * n_chan], res_T sparse_arr_feat_out[N_sparse * n_chan]) { + #pragma HLS PIPELINE + data_T data; + for (int i = 0; i < N_sparse * n_chan; i++) { + data = sparse_arr_feat_in[i]; + if (data > 0) { + sparse_arr_feat_out[i] = data; + } else { + sparse_arr_feat_out[i] = 0; + } + } +} + +template +void sparse_pooling_avg(data_T sparse_arr_feat_in[N_sparse * n_chan], res_T sparse_arr_feat_out[N_sparse * n_chan], + hash_T sparse_arr_hash_in[N_sparse * 2], hash_T sparse_arr_hash_out[N_sparse * 2]) { + + constexpr double _pool_size_recip_d = 1.0 / double(pool_size); + const ap_fixed<10, 0> pool_size_recip = _pool_size_recip_d; + + int hash_tmp[N_sparse * 2]; +#pragma HLS ARRAY_PARTITION variable = hash_tmp type = complete dim = 0 +ComputePooledLoc: + for (int i = 0; i < N_sparse; i++) { + #pragma HLS UNROLL + hash_tmp[2 * i] = (sparse_arr_hash_in[2 * i] - 1) / pool_size + 1; + hash_tmp[2 * i + 1] = (sparse_arr_hash_in[2 * i + 1] - 1) / pool_size + 1; + } + + data_T sparse_arr_feat_in_copy[N_sparse * n_chan]; + #pragma HLS ARRAY_PARTITION variable = sparse_arr_feat_in_copy type = complete dim = 0 + for (int i = 0; i < N_sparse * n_chan; i++) { + #pragma HLS UNROLL + sparse_arr_feat_in_copy[i] = sparse_arr_feat_in[i]; + } + +HashOutLoop: + for (int i_pixel = 0; i_pixel < N_sparse; i_pixel++) { + #pragma HLS UNROLL + int h_out = hash_tmp[2 * i_pixel]; + int w_out = hash_tmp[2 * i_pixel + 1]; + + ChannelLoop: + for (int i_chan = 0; i_chan < n_chan; i_chan++) { + #pragma HLS UNROLL + accum_T acc = 0; + + HashInLoop: + for (int j_pixel = 0; j_pixel < N_sparse; j_pixel++) { + #pragma HLS UNROLL + int h_in = hash_tmp[2 * j_pixel]; + int w_in = hash_tmp[2 * j_pixel + 1]; + + data_T data = sparse_arr_feat_in_copy[n_chan * j_pixel + i_chan]; + if ((h_out == h_in) && (w_out == w_in)) { + acc += data; + sparse_arr_feat_in_copy[n_chan * j_pixel + i_chan] = 0; + } + } + sparse_arr_feat_out[n_chan * i_pixel + i_chan] = (res_T)(acc * pool_size_recip * pool_size_recip); + } + sparse_arr_hash_out[2 * i_pixel] = h_out; + sparse_arr_hash_out[2 * i_pixel + 1] = w_out; + } +} + +template +void sparse_flatten(data_T sparse_arr_feat[N_sparse * n_chan], hash_T sparse_arr_hash[N_sparse * 2], + res_T flat_arr[n_height * n_width * n_chan]) { + +InitFlatArr: + for (int i = 0; i < n_height * n_width * n_chan; i++) { + #pragma HLS UNROLL + flat_arr[i] = 0; + } + +FillFlatArr: + for (int i = 0; i < N_sparse; i++) { + #pragma HLS UNROLL factor = 4 + int i_h = sparse_arr_hash[2 * i]; + int i_w = sparse_arr_hash[2 * i + 1]; + int pixel_idx = (i_h - 1) * n_width + (i_w - 1); + + ChannelLoop: + for (int i_chan = 0; i_chan < n_chan; i_chan++) { + #pragma HLS UNROLL + data_T data = sparse_arr_feat[n_chan * i + i_chan]; + + if (data != 0) { + flat_arr[n_chan * pixel_idx + i_chan] = (res_T)data; + } + } + } +} + +#endif // NNET_SPARSEPIXELS_H_ diff --git a/pyproject.toml b/pyproject.toml index a39c7cb362..c978ff7ca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ optional-dependencies.qkeras = [ "tensorflow-model-optimization<=0.7.5", ] optional-dependencies.quartus-report = [ "calmjs-parse", "tabulate" ] +optional-dependencies.sparsepixels = [ "sparsepixels>=0.2.2" ] optional-dependencies.sr = [ "sympy>=1.13.1" ] optional-dependencies.testing = [ "calmjs-parse", diff --git a/test/pytest/test_sparsepixels.py b/test/pytest/test_sparsepixels.py new file mode 100644 index 0000000000..aaf92c4b85 --- /dev/null +++ b/test/pytest/test_sparsepixels.py @@ -0,0 +1,79 @@ +from pathlib import Path + +import keras +import numpy as np +import pytest + +sparsepixels = pytest.importorskip('sparsepixels') + +from hgq.config import LayerConfigScope, QuantizerConfigScope # noqa: E402 +from hgq.layers import QDense # noqa: E402 +from hgq.quantizer.config import QuantizerConfig # noqa: E402 +from keras.layers import Flatten # noqa: E402 +from sparsepixels.layers import AveragePooling2DSparse, InputReduce, QConv2DSparse # noqa: E402 + +import hls4ml # noqa: E402 + +test_root_path = Path(__file__).parent + + +def _build_sparse_cnn(input_shape=(8, 8, 1), n_max_pixels=4, threshold=0.4): + iq_conf = QuantizerConfig(place='datalane', q_type='kif', i0=4, f0=8, overflow_mode='WRAP') + with ( + QuantizerConfigScope(place='all', default_q_type='kbi', overflow_mode='SAT_SYM'), + QuantizerConfigScope(place='datalane', default_q_type='kif', overflow_mode='WRAP'), + LayerConfigScope(enable_ebops=True, enable_iq=True, beta0=1e-5), + ): + x_in = keras.Input(shape=input_shape, name='x_in') + x, keep_mask = InputReduce(n_max_pixels=n_max_pixels, threshold=threshold, name='input_reduce')(x_in) + x = QConv2DSparse( + filters=2, + kernel_size=3, + name='conv', + padding='same', + strides=1, + activation='relu', + iq_conf=iq_conf, + )([x, keep_mask]) + x, keep_mask = AveragePooling2DSparse(2, name='pool')([x, keep_mask]) + x = Flatten(name='flatten')(x) + x = QDense(1, name='dense', iq_conf=iq_conf)(x) + return keras.Model(x_in, x, name='cnn_sparse_test') + + +def _make_sparse_inputs(n_samples, h=8, w=8, n_active_per_sample=4, threshold=0.4): + x = np.zeros((n_samples, h, w, 1), dtype=np.float32) + for i in range(n_samples): + active_idx = np.random.choice(h * w, size=n_active_per_sample, replace=False) + for idx in active_idx: + x[i, idx // w, idx % w, 0] = threshold + 0.1 + np.random.rand() * 0.5 + return x + + +@pytest.mark.parametrize('backend', ['Vivado', 'Vitis']) +def test_sparse_cnn(test_case_id, backend): + np.random.seed(42) + keras.utils.set_random_seed(42) + + model = _build_sparse_cnn() + x = _make_sparse_inputs(n_samples=1000) + + y_keras = model.predict(x, verbose=0) + + output_dir = test_root_path / test_case_id + hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name', backend=backend) + hls_model = hls4ml.converters.convert_from_keras_model( + model, + hls_config=hls_config, + output_dir=str(output_dir), + backend=backend, + io_type='io_parallel', + ) + hls_model.compile() + + y_hls = hls_model.predict(x).reshape(y_keras.shape) + + mean_abs_diff = float(np.mean(np.abs(y_keras - y_hls))) + print(f'sparse-pixels {backend}: mean|diff|={mean_abs_diff:.4f}') + + assert mean_abs_diff < 0.5