diff --git a/hls4ml/backends/vivado/passes/distributed_arithmetic.py b/hls4ml/backends/vivado/passes/distributed_arithmetic.py index d8672104de..3d576f12e5 100644 --- a/hls4ml/backends/vivado/passes/distributed_arithmetic.py +++ b/hls4ml/backends/vivado/passes/distributed_arithmetic.py @@ -416,6 +416,7 @@ def match(self, node): def transform(self, model: 'ModelGraph', node: DACombinational): from da4ml.codegen.hls import hls_logic_and_bridge_gen + from da4ml.codegen.hls.hls_codegen import get_io_types from da4ml.trace import FixedVariableArrayInput, comb_trace io_type = model.config.get_config_value('IOType') @@ -444,16 +445,44 @@ def transform(self, model: 'ModelGraph', node: DACombinational): raise ValueError(f'Unsupported backend {backend} for DACombinational layer.') fn_name = f'da_comblogic_{node.index}' - comb_logic, _ = hls_logic_and_bridge_gen( - comb, fn_name, flavor=flavor, pragmas=['#pragma HLS INLINE'], print_latency=True - ) - namespace = model.config.get_writer_config().get('Namespace', None) or 'nnet' - inp_t: str = node.get_input_variable().type.name out_t: str = node.get_output_variable().type.name - inp_name: str = node.get_input_variable().name + inp_name: str = ', '.join(node.inputs) out_name: str = node.get_output_variable().name + namespace = model.config.get_writer_config().get('Namespace', None) or 'nnet' + + inp_names = node.inputs + inp_types = [model.graph[name].get_output_variable().type.name for name in inp_names] + inp_ts = ', '.join(inp_types) + + fn_name_internal = f'{fn_name}_internal' if len(inp_names) > 1 else fn_name + _name = fn_name_internal if len(inp_names) > 1 else fn_name + comb_logic, _ = hls_logic_and_bridge_gen( + comb, _name, flavor=flavor, pragmas=['#pragma HLS INLINE'], print_latency=True + ) - fn_cpp = f'{namespace}::{fn_name}<{inp_t}, {out_t}>({inp_name}, {out_name});' + # When there's multiple inputs, make a wrapper doing the concatenation and rename the original fn. + if len(inp_names) > 1: + inp_sizes = [prod(model.graph[name].get_output_variable().shape) for name in inp_names] + template_args = ', '.join(f'typename inp{i}_t' for i in range(len(inp_names))) + ', typename out_t' + fn_args = ', '.join(f'inp{i}_t inp{i}[{s}]' for i, s in enumerate(inp_sizes)) + f', out_t out[{comb.shape[1]}]' + _inp_t_str = get_io_types(comb, 'vitis')[0] + forloop = """for (size_t i = {n}; i < {m}; i++) {{ + inp_buf[i] = inp{i}[i-{n}];}} + #pragma HLS UNROLL""" + N = [0] + np.cumsum(inp_sizes).tolist() + forloops = '\n '.join(forloop.format(i=i, n=N[i], m=N[i + 1]) for i in range(len(inp_names))) + wrapper_fn = f"""template <{template_args}> +void {fn_name}({fn_args}) {{ + {_inp_t_str} inp_buf[{comb.shape[0]}]; + #pragma HLS INLINE + + {forloops} + + {fn_name_internal}<{_inp_t_str}, out_t>(inp_buf, out); + }}""" + comb_logic = comb_logic + '\n\n' + wrapper_fn + + fn_cpp = f'{namespace}::{fn_name}<{inp_ts}, {out_t}>({inp_name}, {out_name});' node.attributes['da_codegen'] = Source(comb_logic) node.attributes['function_cpp'] = fn_cpp diff --git a/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py b/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py index 09723f5336..e66a1d8579 100644 --- a/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py +++ b/hls4ml/converters/keras_v3/hgq2/multi_head_attention.py @@ -18,7 +18,7 @@ class QMultiHeadAttentionHandler(QLayerHandler): def handle( self, - layer: 'hgq.layers.QMultiHeadAttention', + layer: 'hgq.layers.attn.mha.QMultiHeadAttention', in_tensors: Sequence['KerasTensor'], out_tensors: Sequence['KerasTensor'], ): @@ -131,7 +131,7 @@ class QLinformerAttentionHandler(QMultiHeadAttentionHandler): def handle( self, - layer: 'hgq.layers.linformer_attention.QLinformerAttention', + layer: 'hgq.layers.attn.linformer.QLinformerAttention', in_tensors: Sequence['KerasTensor'], out_tensors: Sequence['KerasTensor'], ): diff --git a/hls4ml/converters/keras_v3_to_hls.py b/hls4ml/converters/keras_v3_to_hls.py index 359bc391d6..bcfac3b5ea 100644 --- a/hls4ml/converters/keras_v3_to_hls.py +++ b/hls4ml/converters/keras_v3_to_hls.py @@ -180,7 +180,7 @@ def fallback_handler( if self.allow_da_fallback: try: ret = self.da_call(layer, inp_tensors, out_tensors) - print(f'DA handler used for layer {layer.name}') + print(f'DA handler used for layer {layer.name} ({layer.__class__.__module__}.{layer.__class__.__name__}).') return ret except KeyError: pass # missing DA handler @@ -208,9 +208,12 @@ def da_call( raise ValueError(f'DA combinational requires n_out=1, got {n_out=} for layer {layer.name} ({cls_name}).') input_shapes: list[list[int]] = [list(t.shape[1:]) for t in inp_tensors] # type: ignore - inp = tuple(FixedVariableArrayInput(tuple(shape)).quantize(1, 32, 32) for shape in input_shapes) _model = keras.Model(inp_tensors, out_tensors) - inp, out = trace_model(_model, inputs=inp) + try: + inp, out = trace_model(_model) # When input bw can be determined automatically + except (AssertionError, ValueError): + inp = tuple(FixedVariableArrayInput(tuple(shape)).quantize(1, 32, 32) for shape in input_shapes) + inp, out = trace_model(_model, inputs=inp) comb = comb_trace(inp, out) input_names = [t.name for t in inp_tensors] output_names = [t.name for t in out_tensors] diff --git a/hls4ml/model/optimizer/passes/bit_exact.py b/hls4ml/model/optimizer/passes/bit_exact.py index 88dc65c806..0fc11e0b7a 100644 --- a/hls4ml/model/optimizer/passes/bit_exact.py +++ b/hls4ml/model/optimizer/passes/bit_exact.py @@ -199,9 +199,17 @@ def _(layer: Transpose): @_request_kif.register def _(layer: DACombinational): - comb = layer.attributes['da_comb_trace'] + comb = layer.attributes['da_comb_logic'] k, i, f = comb.inp_kifs - return k.astype(np.int16), i.astype(np.int16), f.astype(np.int16) + inp_shapes = get_input_shapes(layer) + kk, ii, ff = [], [], [] + bias = 0 + for shape in inp_shapes: + size = prod(shape) + kk.append(k[bias : bias + size].reshape(shape).astype(np.int16)) + ii.append(i[bias : bias + size].reshape(shape).astype(np.int16)) + ff.append(f[bias : bias + size].reshape(shape).astype(np.int16)) + return tuple(zip(kk, ii, ff)) def requested_kif(layer: Layer) -> KIF_t: @@ -648,12 +656,15 @@ def _(layer: UnaryLUT): def _(layer: DACombinational): from da4ml.trace import FixedVariableArray, comb_trace - k_in, i_in, f_in = get_input_kifs(layer)[0] - inp = FixedVariableArray.from_kif(k_in, i_in, f_in) - out = layer.attributes['da_comb_logic'](inp) + kifs = [np.array(kif).reshape(3, -1) for kif in get_input_kifs(layer)] + kif = np.concatenate(kifs, axis=1) + inp = FixedVariableArray.from_kif(*kif) + out = layer.attributes['da_comb_logic'](inp.ravel()) comb = comb_trace(inp, out) k, i, f = comb.out_kifs - return k.astype(np.int16), i.astype(np.int16), f.astype(np.int16) + shape = get_output_shape(layer) + + return (k.astype(np.int16).reshape(shape), i.astype(np.int16).reshape(shape), f.astype(np.int16).reshape(shape)) @_produce_kif.register diff --git a/pyproject.toml b/pyproject.toml index a39c7cb362..7f58043f2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ ] dynamic = [ "version" ] dependencies = [ "h5py", "numpy", "pydigitalwavetools==1.1", "pyyaml", "quantizers" ] -optional-dependencies.da = [ "da4ml>=0.5.2,<0.6" ] +optional-dependencies.da = [ "da4ml>=0.6,<0.7" ] optional-dependencies.doc = [ "sphinx", "sphinx-contributors", @@ -35,7 +35,7 @@ optional-dependencies.doc = [ "sphinx-tabs", ] optional-dependencies.hgq = [ "hgq>=0.2.3" ] -optional-dependencies.hgq2 = [ "hgq2>=0.1.7" ] +optional-dependencies.hgq2 = [ "hgq2>=0.1.8" ] optional-dependencies.keras-v3 = [ "keras>=3.10" ] optional-dependencies.onnx = [ "onnx>=1.4" ] optional-dependencies.optimization = [ @@ -67,8 +67,8 @@ optional-dependencies.testing-keras2 = [ "tensorflow>=2.8,<=2.14.1", ] optional-dependencies.testing-keras3 = [ - "da4ml", - "hgq2>=0.1.7", + "da4ml>=0.6,<0.7", + "hgq2>=0.1.8", "keras>=3.10", "tensorflow>=2.15", ] diff --git a/test/pytest/test_hgq2_mha.py b/test/pytest/test_hgq2_mha.py index 6d8b72f4da..380c345278 100644 --- a/test/pytest/test_hgq2_mha.py +++ b/test/pytest/test_hgq2_mha.py @@ -12,9 +12,6 @@ from hls4ml.converters import convert_from_keras_model -# Current hgq2 release rejects the parallelization_factor kwarg that hls4ml passes; skip until supported. -pytest.skip('Skip until hgq2 supports parallelization_factor in QEinsumDense', allow_module_level=True) - test_path = Path(__file__).parent