Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions hls4ml/backends/fpga/fpga_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,22 +129,33 @@ def __init__(self, name):
ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip),
TypeAttribute(
'exp_table',
default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
default=FixedPrecisionType(
18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT
),
description=descriptions.table_type,
),
TypeAttribute(
'inv_table',
default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
default=FixedPrecisionType(
18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT
),
description=descriptions.table_type,
),
TypeAttribute(
'inv_inp',
default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
default=FixedPrecisionType(
18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT
),
description='What the accumulated value is cast to before accessing the inversion table (only in stable)',
),
TypeAttribute(
'accum',
default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
'inp_norm',
default=FixedPrecisionType(
18, 8, signed=False, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT
),
description='The internal width used for the exp table lookup (only in stable)',
),
TypeAttribute('accum', description=descriptions.accum_type),
]
self.attribute_map[Softmax] = softmax_attrs

Expand Down
23 changes: 23 additions & 0 deletions hls4ml/model/optimizer/passes/infer_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def _infer_precision(self, node, types_to_infer):

if node_class in ['PReLU']:
return self._infer_prelu_act_precision(node, types_to_infer)

if node_class in ['Softmax']:
return self._infer_softmax_precision(node, types_to_infer)
# What about quantized activation layer? Setting it to 'auto' manually will break it here. We should prevent
# this in config_from_* functions

Expand Down Expand Up @@ -605,6 +608,26 @@ def _infer_prelu_act_precision(self, node, types_to_infer):

return inferred_types

def _infer_softmax_precision(self, node, types_to_infer):
inferred_types = []

# for softmax, the table parameters have a default setting, so they don't need to be inferred
# here. We never expect them to be of type auto.

# For result, we leave it to be set externally (model default if not set). We expect it to
# likely be the output value, in which case the output format would determine it's precision.
# Therefore, only the accum is configured here

if 'accum_t' in types_to_infer:
exp_w = node.types['exp_table_t'].precision.width
exp_i = node.types['exp_table_t'].precision.integer
exp_s = node.types['exp_table_t'].precision.signed
ceillog = math.ceil(np.log2(node.get_attr('n_in')))
node.types['accum_t'].precision = FixedPrecisionType(exp_w + ceillog, exp_i + ceillog, signed=exp_s)
inferred_types.append('accum_t')

return inferred_types
Comment thread
jmitrevs marked this conversation as resolved.


def _get_precision_from_constant(value: int | float, max_width=8):
"""A utility function to find a fixed type to store the constant
Expand Down
37 changes: 19 additions & 18 deletions hls4ml/templates/vivado/nnet_utils/nnet_activation.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,13 @@ void softmax_latency(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice
// Note we are exponentiating the inputs, which have type data_T
init_exp_table<data_T, CONFIG_T>(exp_table);
// Note we are inverting the exponentials, which have type exp_table_t
init_invert_table<typename CONFIG_T::inv_inp_t, CONFIG_T>(invert_table);
init_invert_table<typename CONFIG_T::accum_t, CONFIG_T>(invert_table);
initialized = true;
}

// Calculate all the e^x's
typename CONFIG_T::accum_t exp_res[CONFIG_T::n_slice];
#pragma HLS array_partition variable=exp_res complete
typename CONFIG_T::inv_inp_t exp_sum(0);
for (unsigned i = 0; i < CONFIG_T::n_slice; i++) {
#pragma HLS unroll
unsigned x = softmax_idx_from_real_val<data_T, CONFIG_T::exp_table_size>(data[i]);
Expand All @@ -206,10 +205,11 @@ void softmax_latency(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice
// Explicitly sum the results with an adder tree.
// Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing
Op_add<typename CONFIG_T::accum_t> op_add;
exp_sum = reduce<typename CONFIG_T::accum_t, CONFIG_T::n_slice, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);
typename CONFIG_T::accum_t exp_sum =
reduce<typename CONFIG_T::accum_t, CONFIG_T::n_slice, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);

typename CONFIG_T::inv_table_t inv_exp_sum =
invert_table[softmax_idx_from_real_val<typename CONFIG_T::inv_inp_t, CONFIG_T::inv_table_size>(exp_sum)];
invert_table[softmax_idx_from_real_val<typename CONFIG_T::accum_t, CONFIG_T::inv_table_size>(exp_sum)];
for (unsigned i = 0; i < CONFIG_T::n_slice; i++) {
#pragma HLS unroll
res[i] = exp_res[i] * inv_exp_sum;
Expand Down Expand Up @@ -251,7 +251,6 @@ void softmax_stable(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice]
// Calculate all the e^x's
typename CONFIG_T::accum_t exp_res[CONFIG_T::n_slice];
#pragma HLS array_partition variable=exp_res complete
typename CONFIG_T::inv_inp_t exp_sum(0);
for (unsigned i = 0; i < CONFIG_T::n_slice; i++) {
#pragma HLS unroll
unsigned x = softmax_idx_from_real_val<typename CONFIG_T::inp_norm_t, CONFIG_T::exp_table_size>(d_xi_xmax[i]);
Expand All @@ -261,7 +260,8 @@ void softmax_stable(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice]
// Explicitly sum the results with an adder tree.
// Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing
Op_add<typename CONFIG_T::accum_t> op_add;
exp_sum = reduce<typename CONFIG_T::accum_t, CONFIG_T::n_slice, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);
typename CONFIG_T::inv_inp_t exp_sum =
reduce<typename CONFIG_T::accum_t, CONFIG_T::n_slice, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);

typename CONFIG_T::inv_table_t inv_exp_sum =
invert_table[softmax_idx_from_real_val<typename CONFIG_T::inv_inp_t, CONFIG_T::inv_table_size>(exp_sum)];
Expand All @@ -271,18 +271,18 @@ void softmax_stable(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice]
}
}

template <typename CONFIG_T, int N_TABLE> void init_exp_table_legacy(typename CONFIG_T::table_t table_out[N_TABLE]) {
template <typename CONFIG_T, int N_TABLE> void init_exp_table_legacy(typename CONFIG_T::exp_table_t table_out[N_TABLE]) {
for (int ii = 0; ii < N_TABLE; ii++) {
// First, convert from table index to X-value (signed 8-bit, range -8 to +8)
float in_val = 2 * 8.0 * (ii - float(N_TABLE) / 2.0) / float(N_TABLE);
// Next, compute lookup table function
typename CONFIG_T::table_t real_val = exp_fcn_float(in_val);
typename CONFIG_T::exp_table_t real_val = exp_fcn_float(in_val);
// std::cout << "Lookup table In Value: " << in_val << " Result: " << real_val << std::endl;
table_out[ii] = real_val;
}
}

template <typename CONFIG_T, int N_TABLE> void init_invert_table_legacy(typename CONFIG_T::table_t table_out[N_TABLE]) {
template <typename CONFIG_T, int N_TABLE> void init_invert_table_legacy(typename CONFIG_T::inv_table_t table_out[N_TABLE]) {
// Inversion function:
// result = 1/x
for (int ii = 0; ii < N_TABLE; ii++) {
Expand All @@ -301,12 +301,12 @@ void softmax_legacy(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice]
// Initialize the lookup table
#ifdef __HLS_SYN__
bool initialized = false;
typename CONFIG_T::table_t exp_table[CONFIG_T::exp_table_size];
typename CONFIG_T::table_t invert_table[CONFIG_T::inv_table_size];
typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size];
typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size];
#else
static bool initialized = false;
static typename CONFIG_T::table_t exp_table[CONFIG_T::exp_table_size];
static typename CONFIG_T::table_t invert_table[CONFIG_T::inv_table_size];
static typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size];
static typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size];
#endif
if (!initialized) {
init_exp_table_legacy<CONFIG_T, CONFIG_T::exp_table_size>(exp_table);
Expand All @@ -317,22 +317,23 @@ void softmax_legacy(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice]
#pragma HLS PIPELINE

// Index into the lookup table based on data for exponentials
typename CONFIG_T::table_t exp_res[CONFIG_T::n_slice]; // different, independent, fixed point precision
typename CONFIG_T::table_t exp_diff_res; // different, independent, fixed point precision
typename CONFIG_T::accum_t exp_res[CONFIG_T::n_slice]; // different, independent, fixed point precision
typename CONFIG_T::exp_table_t exp_diff_res; // different, independent, fixed point precision
data_T data_cache[CONFIG_T::n_slice];
int data_round;
int index;

for (int ii = 0; ii < CONFIG_T::n_slice; ii++) {
data_cache[ii] = data[ii];
exp_res[ii] = 0;
}

// first calculate 1/softmax as a sum over fractions.
for (int ii = 0; ii < CONFIG_T::n_slice; ii++) {
for (int jj = 0; jj < CONFIG_T::n_slice; jj++) {
if (ii == jj)
exp_diff_res = 1;
else {
data_round = (data_cache[jj] - data_cache[ii]) * CONFIG_T::exp_table_size / 16;
auto data_round = (data_cache[jj] - data_cache[ii]) * CONFIG_T::exp_table_size / 16;
index = data_round + 8 * CONFIG_T::exp_table_size / 16;
if (index < 0)
index = 0;
Expand All @@ -352,7 +353,7 @@ void softmax_legacy(data_T data[CONFIG_T::n_slice], res_T res[CONFIG_T::n_slice]
if (exp_res_index > CONFIG_T::inv_table_size - 1)
exp_res_index = CONFIG_T::inv_table_size - 1;
// typename CONFIG_T::table_t exp_res_invert = invert_table[exp_res_index];
res[ii] = (res_T)invert_table[exp_res_index];
res[ii] = static_cast<res_T>(invert_table[exp_res_index]);
}
}

Expand Down
38 changes: 19 additions & 19 deletions hls4ml/templates/vivado/nnet_utils/nnet_activation_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ void softmax_latency(hls::stream<data_T> &data, hls::stream<res_T> &res) {
if (!initialized) {
// Note we are exponentiating the inputs, which have type data_T
init_exp_table<typename data_T::value_type, CONFIG_T>(exp_table);
// Note we are inverting the exponentials, which have type exp_table_t
init_invert_table<typename CONFIG_T::inv_inp_t, CONFIG_T>(invert_table);
// Note we are inverting the summed exponentials, which have type accum_t
init_invert_table<typename CONFIG_T::accum_t, CONFIG_T>(invert_table);
initialized = true;
}

Expand Down Expand Up @@ -150,7 +150,7 @@ void softmax_latency(hls::stream<data_T> &data, hls::stream<res_T> &res) {
exp_sum = reduce<typename CONFIG_T::accum_t, data_T::size, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);

typename CONFIG_T::inv_table_t inv_exp_sum =
invert_table[softmax_idx_from_real_val<typename CONFIG_T::inv_inp_t, CONFIG_T::inv_table_size>(exp_sum)];
invert_table[softmax_idx_from_real_val<typename CONFIG_T::accum_t, CONFIG_T::inv_table_size>(exp_sum)];

res_T out_pack;
PRAGMA_DATA_PACK(out_pack)
Expand Down Expand Up @@ -216,7 +216,6 @@ void softmax_stable(hls::stream<data_T> &data, hls::stream<res_T> &res) {
// Calculate all the e^x's
typename CONFIG_T::accum_t exp_res[data_T::size];
#pragma HLS ARRAY_PARTITION variable=exp_res complete
typename CONFIG_T::inv_inp_t exp_sum(0);
for (unsigned j = 0; j < data_T::size; j++) {
#pragma HLS UNROLL
unsigned x = softmax_idx_from_real_val<typename CONFIG_T::inp_norm_t, CONFIG_T::exp_table_size>(d_xi_xmax[j]);
Expand All @@ -226,7 +225,8 @@ void softmax_stable(hls::stream<data_T> &data, hls::stream<res_T> &res) {
// Explicitly sum the results with an adder tree.
// Rounding & Saturation mode, which improve accuracy, prevent Vivado from expression balancing
Op_add<typename CONFIG_T::accum_t> op_add;
exp_sum = reduce<typename CONFIG_T::accum_t, data_T::size, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);
typename CONFIG_T::inv_inp_t exp_sum =
reduce<typename CONFIG_T::accum_t, data_T::size, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add);

typename CONFIG_T::inv_table_t inv_exp_sum =
invert_table[softmax_idx_from_real_val<typename CONFIG_T::inv_inp_t, CONFIG_T::inv_table_size>(exp_sum)];
Expand All @@ -249,22 +249,22 @@ void softmax_legacy(hls::stream<data_T> &data, hls::stream<res_T> &res) {
// Initialize the lookup table
#ifdef __HLS_SYN__
bool initialized = false;
typename CONFIG_T::table_t exp_table[CONFIG_T::table_size];
typename CONFIG_T::table_t invert_table[CONFIG_T::table_size];
typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size];
typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size];
#else
static bool initialized = false;
static typename CONFIG_T::table_t exp_table[CONFIG_T::table_size];
static typename CONFIG_T::table_t invert_table[CONFIG_T::table_size];
static typename CONFIG_T::exp_table_t exp_table[CONFIG_T::exp_table_size];
static typename CONFIG_T::inv_table_t invert_table[CONFIG_T::inv_table_size];
#endif
if (!initialized) {
init_exp_table_legacy<CONFIG_T, CONFIG_T::table_size>(exp_table);
init_invert_table_legacy<CONFIG_T, CONFIG_T::table_size>(invert_table);
init_exp_table_legacy<CONFIG_T, CONFIG_T::exp_table_size>(exp_table);
init_invert_table_legacy<CONFIG_T, CONFIG_T::inv_table_size>(invert_table);
initialized = true;
}

// Index into the lookup table based on data for exponentials
typename CONFIG_T::table_t exp_res[data_T::size];
typename CONFIG_T::table_t exp_diff_res;
typename CONFIG_T::accum_t exp_res[data_T::size];
typename CONFIG_T::exp_table_t exp_diff_res;
typename data_T::value_type data_cache[data_T::size];

SoftmaxInitLoop:
Expand All @@ -288,12 +288,12 @@ void softmax_legacy(hls::stream<data_T> &data, hls::stream<res_T> &res) {
if (i == j) {
exp_diff_res = 1;
} else {
int data_round = (data_cache[j] - data_cache[i]) * CONFIG_T::table_size / 16;
auto data_round = (data_cache[j] - data_cache[i]) * CONFIG_T::table_size / 16;
int index = data_round + 8 * CONFIG_T::table_size / 16;
if (index < 0)
index = 0;
if (index > CONFIG_T::table_size - 1)
index = CONFIG_T::table_size - 1;
if (index > CONFIG_T::exp_table_size - 1)
index = CONFIG_T::exp_table_size - 1;
exp_diff_res = exp_table[index];
}

Expand All @@ -311,10 +311,10 @@ void softmax_legacy(hls::stream<data_T> &data, hls::stream<res_T> &res) {
int exp_res_index = exp_res[j] * CONFIG_T::table_size / 64;
if (exp_res_index < 0)
exp_res_index = 0;
if (exp_res_index > CONFIG_T::table_size - 1)
exp_res_index = CONFIG_T::table_size - 1;
if (exp_res_index > CONFIG_T::inv_table_size - 1)
exp_res_index = CONFIG_T::inv_table_size - 1;

out_pack[j] = (typename res_T::value_type)invert_table[exp_res_index];
out_pack[j] = static_cast<typename res_T::value_type>(invert_table[exp_res_index]);
}
res.write(out_pack);
}
Expand Down
37 changes: 37 additions & 0 deletions test/pytest/test_auto_precision.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from pathlib import Path

import numpy as np
Expand All @@ -13,10 +14,12 @@
ReLU,
SeparableConv1D,
SeparableConv2D,
Softmax,
)
from tensorflow.keras.models import Sequential

import hls4ml
import hls4ml.model.layers
from hls4ml.model.optimizer.passes.infer_precision import _get_precision_from_constant

test_root_path = Path(__file__).parent
Expand Down Expand Up @@ -285,3 +288,37 @@ def test_precision_from_constant_unit(val, expected_width):
quantum = 2.0**-fp.fractional
if expected_width < max_width:
assert val % quantum == 0


@pytest.mark.parametrize('n_in', [4, 8, 16])
@pytest.mark.parametrize('backend', ['Vitis', 'oneAPI'])
def test_auto_precision_softmax(test_case_id, n_in, backend):
"""Test that auto accumulator precision is correctly inferred for softmax layers."""
model = Sequential()
model.add(Softmax(input_shape=(n_in,)))
model.compile()

config = hls4ml.utils.config_from_keras_model(model, backend=backend, granularity='name')

odir = str(test_root_path / test_case_id)
hls_model = hls4ml.converters.convert_from_keras_model(model, hls_config=config, output_dir=odir, backend=backend)

# Find the Softmax layer and verify accum_t precision
softmax_layer = next((layer for layer in hls_model.get_layers() if isinstance(layer, hls4ml.model.layers.Softmax)), None)
assert softmax_layer is not None, 'No Softmax layer found in converted model'

accum_t = softmax_layer.types['accum_t'].precision
exp_table_t = softmax_layer.types['exp_table_t'].precision

ceillog = math.ceil(math.log2(n_in))
expected_width = exp_table_t.width + ceillog
expected_integer = exp_table_t.integer + ceillog
expected_signed = exp_table_t.signed

assert accum_t.width == expected_width, f'Expected accum_t width {expected_width}, got {accum_t.width} (n_in={n_in})'
assert accum_t.integer == expected_integer, (
f'Expected accum_t integer {expected_integer}, got {accum_t.integer} (n_in={n_in})'
)
assert accum_t.signed == expected_signed, (
f'Expected accum_t signed={expected_signed}, got {accum_t.signed} (n_in={n_in})'
)
Loading