diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py index db0f256172..1518f1a04b 100644 --- a/hls4ml/backends/fpga/fpga_backend.py +++ b/hls4ml/backends/fpga/fpga_backend.py @@ -129,22 +129,33 @@ def __init__(self, name): ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip), TypeAttribute( 'exp_table', - default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT), + default=FixedPrecisionType( + 18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT + ), description=descriptions.table_type, ), TypeAttribute( 'inv_table', - default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT), + default=FixedPrecisionType( + 18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT + ), description=descriptions.table_type, ), TypeAttribute( 'inv_inp', - default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT), + default=FixedPrecisionType( + 18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT + ), + description='What the accumulated value is cast to before accessing the inversion table (only in stable)', ), TypeAttribute( - 'accum', - default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT), + 'inp_norm', + default=FixedPrecisionType( + 18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT + ), + description='The internal width used for the exp table lookup (only in stable)', ), + TypeAttribute('accum', description=descriptions.accum_type), ] self.attribute_map[Softmax] = softmax_attrs diff --git a/hls4ml/model/optimizer/passes/infer_precision.py b/hls4ml/model/optimizer/passes/infer_precision.py index 0bc29a2955..189ffb7dda 100644 --- a/hls4ml/model/optimizer/passes/infer_precision.py +++ b/hls4ml/model/optimizer/passes/infer_precision.py @@ -90,6 +90,9 @@ def _infer_precision(self, node, types_to_infer): if node_class in ['PReLU']: return self._infer_prelu_act_precision(node, types_to_infer) + + if node_class in ['Softmax']: + return self._infer_softmax_precision(node, types_to_infer) # What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent # this in config_from_* functions @@ -605,6 +608,26 @@ def _infer_prelu_act_precision(self, node, types_to_infer): return inferred_types + def _infer_softmax_precision(self, node, types_to_infer): + inferred_types = [] + + # for softmax, the table parameters have a default setting, so they don't need to be inferred + # here. We never expect them to be of type auto. + + # For result, we leave it to be set externally (model default if not set). We expect it to + # likely be the output value, in which case the output format would determine it's precision. + # Therefore, only the accum is configured here + + if 'accum_t' in types_to_infer: + exp_w = node.types['exp_table_t'].precision.width + exp_i = node.types['exp_table_t'].precision.integer + exp_s = node.types['exp_table_t'].precision.signed + ceillog = math.ceil(np.log2(node.get_attr('n_in'))) + node.types['accum_t'].precision = FixedPrecisionType(exp_w + ceillog, exp_i + ceillog, signed=exp_s) + inferred_types.append('accum_t') + + return inferred_types + def _get_precision_from_constant(value: int | float, max_width=8): """A utility function to find a fixed type to store the constant diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_activation.h b/hls4ml/templates/vivado/nnet_utils/nnet_activation.h index ac85e0b2cc..b262c86859 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_activation.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_activation.h @@ -189,14 +189,13 @@ void softmax_latency(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice // Note we are exponentiating the inputs, which have type data_T init_exp_table(exp_table); // Note we are inverting the exponentials, which have type exp_table_t - init_invert_table(invert_table); + init_invert_table(invert_table); initialized = true; } // Calculate all the e^x's typename CONFIG_T::accum_t exp_res[CONFIG_T::n_slice]; #pragma HLS array_partition variable=exp_res complete - typename CONFIG_T::inv_inp_t exp_sum(0); for (unsigned i = 0; i < CONFIG_T::n_slice; i++) { #pragma HLS unroll unsigned x = softmax_idx_from_real_val(data[i]); @@ -206,10 +205,11 @@ void softmax_latency(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice // Explicitly sum the results with an adder tree. // Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing Op_add op_add; - exp_sum = reduce>(exp_res, op_add); + typename CONFIG_T::accum_t exp_sum = + reduce>(exp_res, op_add); typename CONFIG_T::inv_table_t inv_exp_sum = - invert_table[softmax_idx_from_real_val(exp_sum)]; + invert_table[softmax_idx_from_real_val(exp_sum)]; for (unsigned i = 0; i < CONFIG_T::n_slice; i++) { #pragma HLS unroll res[i] = exp_res[i] * inv_exp_sum; @@ -251,7 +251,6 @@ void softmax_stable(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice] // Calculate all the e^x's typename CONFIG_T::accum_t exp_res[CONFIG_T::n_slice]; #pragma HLS array_partition variable=exp_res complete - typename CONFIG_T::inv_inp_t exp_sum(0); for (unsigned i = 0; i < CONFIG_T::n_slice; i++) { #pragma HLS unroll unsigned x = softmax_idx_from_real_val(d_xi_xmax[i]); @@ -261,7 +260,8 @@ void softmax_stable(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice] // Explicitly sum the results with an adder tree. // Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing Op_add op_add; - exp_sum = reduce>(exp_res, op_add); + typename CONFIG_T::inv_inp_t exp_sum = + reduce>(exp_res, op_add); typename CONFIG_T::inv_table_t inv_exp_sum = invert_table[softmax_idx_from_real_val(exp_sum)]; @@ -271,18 +271,18 @@ void softmax_stable(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice] } } -template void init_exp_table_legacy(typename CONFIG_T::table_t table_out[N_TABLE]) { +template void init_exp_table_legacy(typename CONFIG_T::exp_table_t table_out[N_TABLE]) { for (int ii = 0; ii < N_TABLE; ii++) { // First, convert from table index to X-value (signed 8-bit, range -8 to +8) float in_val = 2 * 8.0 * (ii - float(N_TABLE) / 2.0) / float(N_TABLE); // Next, compute lookup table function - typename CONFIG_T::table_t real_val = exp_fcn_float(in_val); + typename CONFIG_T::exp_table_t real_val = exp_fcn_float(in_val); // std::cout << "Lookup table In Value: " << in_val << " Result: " << real_val << std::endl; table_out[ii] = real_val; } } -template void init_invert_table_legacy(typename CONFIG_T::table_t table_out[N_TABLE]) { +template void init_invert_table_legacy(typename CONFIG_T::inv_table_t table_out[N_TABLE]) { // Inversion function: // result = 1/x for (int ii = 0; ii < N_TABLE; ii++) { @@ -301,12 +301,12 @@ void softmax_legacy(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice] // Initialize the lookup table #ifdef __HLS_SYN__ bool initialized = false; - typename CONFIG_T::table_t exp_table[CONFIG_T::exp_table_size]; - typename CONFIG_T::table_t invert_table[CONFIG_T::inv_table_size]; + typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size]; + typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size]; #else static bool initialized = false; - static typename CONFIG_T::table_t exp_table[CONFIG_T::exp_table_size]; - static typename CONFIG_T::table_t invert_table[CONFIG_T::inv_table_size]; + static typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size]; + static typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size]; #endif if (!initialized) { init_exp_table_legacy(exp_table); @@ -317,22 +317,23 @@ void softmax_legacy(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice] #pragma HLS PIPELINE // Index into the lookup table based on data for exponentials - typename CONFIG_T::table_t exp_res[CONFIG_T::n_slice]; // different, independent, fixed point precision - typename CONFIG_T::table_t exp_diff_res; // different, independent, fixed point precision + typename CONFIG_T::accum_t exp_res[CONFIG_T::n_slice]; // different, independent, fixed point precision + typename CONFIG_T::exp_table_t exp_diff_res; // different, independent, fixed point precision data_T data_cache[CONFIG_T::n_slice]; - int data_round; int index; + for (int ii = 0; ii < CONFIG_T::n_slice; ii++) { data_cache[ii] = data[ii]; exp_res[ii] = 0; } + // first calculate 1/softmax as a sum over fractions. for (int ii = 0; ii < CONFIG_T::n_slice; ii++) { for (int jj = 0; jj < CONFIG_T::n_slice; jj++) { if (ii == jj) exp_diff_res = 1; else { - data_round = (data_cache[jj] - data_cache[ii]) * CONFIG_T::exp_table_size / 16; + auto data_round = (data_cache[jj] - data_cache[ii]) * CONFIG_T::exp_table_size / 16; index = data_round + 8 * CONFIG_T::exp_table_size / 16; if (index < 0) index = 0; @@ -352,7 +353,7 @@ void softmax_legacy(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice] if (exp_res_index > CONFIG_T::inv_table_size - 1) exp_res_index = CONFIG_T::inv_table_size - 1; // typename CONFIG_T::table_t exp_res_invert = invert_table[exp_res_index]; - res[ii] = (res_T)invert_table[exp_res_index]; + res[ii] = static_cast(invert_table[exp_res_index]); } } diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h index 50c6c4068c..814ed2f50a 100644 --- a/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h +++ b/hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h @@ -120,8 +120,8 @@ void softmax_latency(hls::stream &data, hls::stream &res) { if (!initialized) { // Note we are exponentiating the inputs, which have type data_T init_exp_table(exp_table); - // Note we are inverting the exponentials, which have type exp_table_t - init_invert_table(invert_table); + // Note we are inverting the summed exponentials, which have type accum_t + init_invert_table(invert_table); initialized = true; } @@ -150,7 +150,7 @@ void softmax_latency(hls::stream &data, hls::stream &res) { exp_sum = reduce>(exp_res, op_add); typename CONFIG_T::inv_table_t inv_exp_sum = - invert_table[softmax_idx_from_real_val(exp_sum)]; + invert_table[softmax_idx_from_real_val(exp_sum)]; res_T out_pack; PRAGMA_DATA_PACK(out_pack) @@ -216,7 +216,6 @@ void softmax_stable(hls::stream &data, hls::stream &res) { // Calculate all the e^x's typename CONFIG_T::accum_t exp_res[data_T::size]; #pragma HLS ARRAY_PARTITION variable=exp_res complete - typename CONFIG_T::inv_inp_t exp_sum(0); for (unsigned j = 0; j < data_T::size; j++) { #pragma HLS UNROLL unsigned x = softmax_idx_from_real_val(d_xi_xmax[j]); @@ -226,7 +225,8 @@ void softmax_stable(hls::stream &data, hls::stream &res) { // Explicitly sum the results with an adder tree. // Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing Op_add op_add; - exp_sum = reduce>(exp_res, op_add); + typename CONFIG_T::inv_inp_t exp_sum = + reduce>(exp_res, op_add); typename CONFIG_T::inv_table_t inv_exp_sum = invert_table[softmax_idx_from_real_val(exp_sum)]; @@ -249,22 +249,22 @@ void softmax_legacy(hls::stream &data, hls::stream &res) { // Initialize the lookup table #ifdef __HLS_SYN__ bool initialized = false; - typename CONFIG_T::table_t exp_table[CONFIG_T::table_size]; - typename CONFIG_T::table_t invert_table[CONFIG_T::table_size]; + typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size]; + typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size]; #else static bool initialized = false; - static typename CONFIG_T::table_t exp_table[CONFIG_T::table_size]; - static typename CONFIG_T::table_t invert_table[CONFIG_T::table_size]; + static typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size]; + static typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size]; #endif if (!initialized) { - init_exp_table_legacy(exp_table); - init_invert_table_legacy(invert_table); + init_exp_table_legacy(exp_table); + init_invert_table_legacy(invert_table); initialized = true; } // Index into the lookup table based on data for exponentials - typename CONFIG_T::table_t exp_res[data_T::size]; - typename CONFIG_T::table_t exp_diff_res; + typename CONFIG_T::accum_t exp_res[data_T::size]; + typename CONFIG_T::exp_table_t exp_diff_res; typename data_T::value_type data_cache[data_T::size]; SoftmaxInitLoop: @@ -288,12 +288,12 @@ void softmax_legacy(hls::stream &data, hls::stream &res) { if (i == j) { exp_diff_res = 1; } else { - int data_round = (data_cache[j] - data_cache[i]) * CONFIG_T::table_size / 16; + auto data_round = (data_cache[j] - data_cache[i]) * CONFIG_T::table_size / 16; int index = data_round + 8 * CONFIG_T::table_size / 16; if (index < 0) index = 0; - if (index > CONFIG_T::table_size - 1) - index = CONFIG_T::table_size - 1; + if (index > CONFIG_T::exp_table_size - 1) + index = CONFIG_T::exp_table_size - 1; exp_diff_res = exp_table[index]; } @@ -311,10 +311,10 @@ void softmax_legacy(hls::stream &data, hls::stream &res) { int exp_res_index = exp_res[j] * CONFIG_T::table_size / 64; if (exp_res_index < 0) exp_res_index = 0; - if (exp_res_index > CONFIG_T::table_size - 1) - exp_res_index = CONFIG_T::table_size - 1; + if (exp_res_index > CONFIG_T::inv_table_size - 1) + exp_res_index = CONFIG_T::inv_table_size - 1; - out_pack[j] = (typename res_T::value_type)invert_table[exp_res_index]; + out_pack[j] = static_cast(invert_table[exp_res_index]); } res.write(out_pack); } diff --git a/test/pytest/test_auto_precision.py b/test/pytest/test_auto_precision.py index d3738c8461..57ea268340 100644 --- a/test/pytest/test_auto_precision.py +++ b/test/pytest/test_auto_precision.py @@ -1,3 +1,4 @@ +import math from pathlib import Path import numpy as np @@ -13,10 +14,12 @@ ReLU, SeparableConv1D, SeparableConv2D, + Softmax, ) from tensorflow.keras.models import Sequential import hls4ml +import hls4ml.model.layers from hls4ml.model.optimizer.passes.infer_precision import _get_precision_from_constant test_root_path = Path(__file__).parent @@ -285,3 +288,37 @@ def test_precision_from_constant_unit(val, expected_width): quantum = 2.0**-fp.fractional if expected_width < max_width: assert val % quantum == 0 + + +@pytest.mark.parametrize('n_in', [4, 8, 16]) +@pytest.mark.parametrize('backend', ['Vitis', 'oneAPI']) +def test_auto_precision_softmax(test_case_id, n_in, backend): + """Test that auto accumulator precision is correctly inferred for softmax layers.""" + model = Sequential() + model.add(Softmax(input_shape=(n_in,))) + model.compile() + + config = hls4ml.utils.config_from_keras_model(model, backend=backend, granularity='name') + + odir = str(test_root_path / test_case_id) + hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=odir, backend=backend) + + # Find the Softmax layer and verify accum_t precision + softmax_layer = next((layer for layer in hls_model.get_layers() if isinstance(layer, hls4ml.model.layers.Softmax)), None) + assert softmax_layer is not None, 'No Softmax layer found in converted model' + + accum_t = softmax_layer.types['accum_t'].precision + exp_table_t = softmax_layer.types['exp_table_t'].precision + + ceillog = math.ceil(math.log2(n_in)) + expected_width = exp_table_t.width + ceillog + expected_integer = exp_table_t.integer + ceillog + expected_signed = exp_table_t.signed + + assert accum_t.width == expected_width, f'Expected accum_t width {expected_width}, got {accum_t.width} (n_in={n_in})' + assert accum_t.integer == expected_integer, ( + f'Expected accum_t integer {expected_integer}, got {accum_t.integer} (n_in={n_in})' + ) + assert accum_t.signed == expected_signed, ( + f'Expected accum_t signed={expected_signed}, got {accum_t.signed} (n_in={n_in})' + )