-
Notifications
You must be signed in to change notification settings - Fork 518
HGQ QMHA and QLinformer support for oneAPI #1432
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3d463b3
d678573
77258bc
59bd96f
dbb207b
0c59255
51efff0
6067bea
06fda4e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -100,15 +100,8 @@ template <class data_T, class res_T, typename CONFIG_T> void sigmoid(const data_ | |
| enum class softmax_implementation { latency = 0, legacy = 1, stable = 2, argmax = 3 }; | ||
|
|
||
| template <class data_T, typename CONFIG_T> inline unsigned softmax_stable_idx_from_real_val(const data_T x) { | ||
| // Number of address bits for table | ||
| static constexpr int N = ceillog2<CONFIG_T::table_size>::val; | ||
|
|
||
| // Slice the top N bits of the input | ||
| [[intel::fpga_register]] ac_int<N, false> y = x.template slc<N>(x.width - N - 1); | ||
| // If x is the most negative value, the slice will be 0, so we need to set the 0-th bit to ensure correctness | ||
| if (x != 0 && y == 0) | ||
| y[0] = 1; | ||
| return y.to_uint(); | ||
| // Extract the lower 'width' bits of x | ||
| return x.template slc<data_T::width>(0).to_uint(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a logic change. Before it took the upper bits. Does it now assume that the table size is always the same as the width of the value? If so, I guess it must be reinforced somewhere. Are we sure we want this?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see the logic is unchanged for Vivado and libero, so I am hesitant to have this change. |
||
| } | ||
|
|
||
| template <class data_T, typename CONFIG_T> inline unsigned softmax_latency_idx_from_real_val(const data_T x) { | ||
|
|
@@ -121,7 +114,6 @@ template <class data_T, typename CONFIG_T> inline unsigned softmax_latency_idx_f | |
| } | ||
|
|
||
| template <class data_T, class res_T, typename CONFIG_T> void softmax_stable(const data_T &data, res_T &res) { | ||
| // Look-up tables | ||
| #include "activation_tables/exp_table.tb" | ||
| #include "activation_tables/invert_table.tb" | ||
|
|
||
|
|
@@ -130,29 +122,34 @@ template <class data_T, class res_T, typename CONFIG_T> void softmax_stable(cons | |
| [[intel::fpga_register]] auto x_max = | ||
| reduce<typename data_T::value_type, CONFIG_T::n_in, Op_max<typename data_T::value_type>>(data.data(), op_max); | ||
|
|
||
| // For the diffs, use the same type as the input but force rounding and saturation | ||
| [[intel::fpga_register]] ac_fixed<data_T::value_type::width, data_T::value_type::i_width, true, AC_RND, AC_SAT> | ||
| d_xi_xmax[CONFIG_T::n_in]; | ||
| // Normalize inputs: d = x_max - x | ||
| [[intel::fpga_register]] typename CONFIG_T::inp_norm_t d_xi_xmax[CONFIG_T::n_in]; | ||
| #pragma unroll | ||
| for (unsigned i = 0; i < CONFIG_T::n_in; i++) { | ||
| d_xi_xmax[i] = data[i] - x_max; | ||
| // HGQ stable: d = x_max - data | ||
| d_xi_xmax[i] = x_max - data[i]; | ||
| } | ||
|
|
||
| // Calculate all the e^x's | ||
| // Exponentials | ||
| [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_res[CONFIG_T::n_in]; | ||
| #pragma unroll | ||
| for (unsigned i = 0; i < CONFIG_T::n_in; i++) { | ||
| exp_res[i] = exp_table[softmax_stable_idx_from_real_val<typename data_T::value_type, CONFIG_T>(d_xi_xmax[i])]; | ||
| unsigned idx = softmax_stable_idx_from_real_val<typename CONFIG_T::inp_norm_t, CONFIG_T>(d_xi_xmax[i]); | ||
| exp_res[i] = exp_table[idx]; | ||
| } | ||
|
|
||
| // Explicitly sum previously calculated exponentials with an adder tree | ||
| Op_add<typename CONFIG_T::exp_table_t> op_add; | ||
| [[intel::fpga_register]] typename CONFIG_T::exp_table_t exp_sum = | ||
| reduce<typename CONFIG_T::exp_table_t, CONFIG_T::n_in, Op_add<typename CONFIG_T::exp_table_t>>(exp_res, op_add); | ||
| // Sum of Exponentials | ||
| Op_add<typename CONFIG_T::accum_t> op_add; | ||
jmitrevs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| [[intel::fpga_register]] typename CONFIG_T::accum_t exp_sum = | ||
| reduce<typename CONFIG_T::exp_table_t, CONFIG_T::n_in, Op_add<typename CONFIG_T::accum_t>>(exp_res, op_add); | ||
|
|
||
| // Multiply previously calculated exponetials with the reciprocal of the sum | ||
| [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = | ||
| invert_table[softmax_stable_idx_from_real_val<typename CONFIG_T::exp_table_t, CONFIG_T>(exp_sum)]; | ||
| // Reciprocal of Sum | ||
| typename CONFIG_T::inv_inp_t exp_sum_cast = exp_sum; | ||
| unsigned inv_idx = softmax_stable_idx_from_real_val<typename CONFIG_T::inv_inp_t, CONFIG_T>(exp_sum_cast); | ||
|
|
||
| [[intel::fpga_register]] typename CONFIG_T::inv_table_t inv_exp_sum = invert_table[inv_idx]; | ||
|
|
||
| // Final Multiplication | ||
| #pragma unroll | ||
| for (unsigned i = 0; i < CONFIG_T::n_in; i++) { | ||
| res[i] = exp_res[i] * inv_exp_sum; | ||
|
|
@@ -265,6 +262,45 @@ template <class data_T, class res_T, typename CONFIG_T> inline void softmax(cons | |
| } | ||
| } | ||
|
|
||
| // ************************************************* | ||
| // Multidimensional Softmax | ||
| // ************************************************* | ||
|
|
||
| // Helper to remap the config for the core softmax function | ||
| template <class CONFIG_T> struct softmax_multidim_slice_config : CONFIG_T { | ||
| static constexpr unsigned n_in = CONFIG_T::n_slice; | ||
| }; | ||
|
|
||
| template <class data_T, class res_T, typename CONFIG_T> inline void softmax_multidim(const data_T &data, res_T &res) { | ||
| using buffer_data_t = std::array<typename data_T::value_type, CONFIG_T::n_slice>; | ||
| using buffer_res_t = std::array<typename res_T::value_type, CONFIG_T::n_slice>; | ||
| using slice_config = softmax_multidim_slice_config<CONFIG_T>; | ||
|
|
||
| #pragma unroll | ||
| for (unsigned i = 0; i < CONFIG_T::n_outer; i++) { | ||
| #pragma unroll | ||
| for (unsigned k = 0; k < CONFIG_T::n_inner; k++) { | ||
|
|
||
| [[intel::fpga_register]] buffer_data_t buffer_in; | ||
| [[intel::fpga_register]] buffer_res_t buffer_out; | ||
|
|
||
| // Gather Phase | ||
| #pragma unroll | ||
| for (unsigned j = 0; j < CONFIG_T::n_slice; j++) { | ||
| unsigned idx = (i * CONFIG_T::n_slice * CONFIG_T::n_inner) + (j * CONFIG_T::n_inner) + k; | ||
| buffer_in[j] = data[idx]; | ||
| } | ||
|
|
||
| nnet::softmax<buffer_data_t, buffer_res_t, slice_config>(buffer_in, buffer_out); | ||
|
|
||
| #pragma unroll | ||
| for (unsigned j = 0; j < CONFIG_T::n_slice; j++) { | ||
| unsigned idx = (i * CONFIG_T::n_slice * CONFIG_T::n_inner) + (j * CONFIG_T::n_inner) + k; | ||
| res[idx] = buffer_out[j]; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| // ************************************************* | ||
| // TanH Activation | ||
| // ************************************************* | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -549,16 +549,16 @@ def write_nnet_utils(self, model): | |
| dstpath = f'{model.config.get_output_dir()}/src/firmware/{dst}' | ||
| copyfile(srcpath, dstpath) | ||
|
|
||
| def __get_table_size(self, model, activation): | ||
| def __get_table_size(self, model, activation, table_name='table_size'): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| for layer in model.get_layers(): | ||
| if ( | ||
| layer.get_attr('activation') == activation or layer.get_attr('recurrent_activation') == activation | ||
| ) and layer.get_attr('table_size') is not None: | ||
| return int(layer.get_attr('table_size')) | ||
| ) and layer.get_attr(table_name) is not None: | ||
| return int(layer.get_attr(table_name)) | ||
| return 1024 | ||
|
|
||
| def __get_table_header(self, table_name, table_size): | ||
| table_header = f'static const typename CONFIG_T::table_t {table_name}[{table_size}] = {{' | ||
| def __get_table_header(self, table_name, table_size, table_type='table_t'): | ||
| table_header = f'static const typename CONFIG_T::{table_type} {table_name}[{table_size}] = {{' | ||
| return table_header | ||
|
|
||
| def __write_elu_table(self, model, path): | ||
|
|
@@ -687,46 +687,58 @@ def __write_selu_table(self, model, path): | |
| h_file.write('};\n') | ||
| h_file.close() | ||
|
|
||
| def __get_table_precision(self, model, activation, table_name='table_precision'): | ||
| for layer in model.get_layers(): | ||
| if layer.get_attr('activation') == activation and layer.get_attr(table_name) is not None: | ||
| precision = layer.get_attr(table_name) | ||
| return precision.precision | ||
|
|
||
| return None # fp_bits, fp_integer, fp_signed | ||
|
|
||
| def __write_exp_table(self, model, path): | ||
| table_name = 'exp_table' | ||
| table_size = self.__get_table_size(model, 'softmax') | ||
| table_size = self.__get_table_size(model, 'softmax', table_name='exp_table_size') | ||
|
|
||
| h_file = open(f'{path}/{table_name}.tb', 'w') | ||
| h_file.write(self.__get_table_header(table_name, table_size)) | ||
| h_file.write(self.__get_table_header(table_name, table_size, table_type='exp_table_t')) | ||
|
|
||
| # Default fixed point precision | ||
| # 6 bits for integer part, 10 bits for decimal - total, 16 | ||
| fp_bits = 16 | ||
| fp_integer = 6 | ||
| fp_signed = True | ||
| precision = self.__get_table_precision(model, 'softmax', table_name='inp_norm_t') | ||
|
|
||
| if precision is None: | ||
| fp_bits = 16 | ||
| fp_integer = 6 | ||
| fp_signed = True | ||
|
|
||
| for layer in model.get_layers(): | ||
| if layer.name == 'softmax': | ||
| ac_type = layer.get_input_variable().type | ||
| if ac_type is not None: | ||
| try: | ||
| fp_bits = ac_type.precision.integer + ac_type.precision.fractional | ||
| fp_integer = ac_type.precision.integer | ||
| fp_signed = ac_type.precision.signed | ||
| except Exception: | ||
| # FixedPrecisionType wasn't correctly stored in layer attributes, use default values | ||
| pass | ||
| if fp_signed is False: | ||
| raise Exception('Softmax types need to be signed') | ||
|
|
||
| # Exp table should use the same precision as exp_table, as seen in Vivado code | ||
| # init_exp_table<data_T, CONFIG_T>(exp_table); | ||
| for layer in model.get_layers(): | ||
| if layer.name == 'softmax': | ||
| ac_type = layer.get_input_variable().type | ||
| if ac_type is not None: | ||
| try: | ||
| fp_bits = ac_type.precision.integer + ac_type.precision.fractional | ||
| fp_integer = ac_type.precision.integer | ||
| fp_signed = ac_type.precision.signed | ||
| except Exception: | ||
| # FixedPrecisionType wasn't correctly stored in layer attributes, use default values | ||
| pass | ||
| if fp_signed is False: | ||
| raise Exception('Softmax types need to be signed') | ||
| else: | ||
| fp_bits = precision.width | ||
| fp_integer = precision.integer | ||
| fp_signed = precision.signed | ||
|
|
||
| f_bits = fp_bits - fp_integer | ||
| sep = '' | ||
| N = ceil_log2(table_size) | ||
| for i in range(table_size): | ||
| f = FixedPointEmulator(fp_bits, fp_integer, signed=fp_signed) | ||
| b = uint_to_binary(i, N) | ||
| if i == 0: | ||
| b.insert(0, 0) | ||
| else: | ||
| b.insert(0, 1) | ||
| f.set_msb_bits(b) | ||
| real_val = f.exp_float() | ||
| # Index represents the raw bit pattern of the input | ||
| real_val_in = i * (2.0 ** (-f_bits)) | ||
|
|
||
| # Calculate exp(-x) for the stable implementation | ||
| real_val = np.exp(-real_val_in) | ||
|
|
||
| h_file.write(sep + str(real_val)) | ||
| sep = ', ' | ||
|
|
||
|
|
@@ -735,41 +747,50 @@ def __write_exp_table(self, model, path): | |
|
|
||
| def __write_invert_table(self, model, path): | ||
| table_name = 'invert_table' | ||
| table_size = self.__get_table_size(model, 'softmax') | ||
| table_size = self.__get_table_size(model, 'softmax', table_name='inv_table_size') | ||
|
|
||
| h_file = open(f'{path}/{table_name}.tb', 'w') | ||
| h_file.write(self.__get_table_header(table_name, table_size)) | ||
|
|
||
| h_file.write(self.__get_table_header(table_name, table_size, table_type='inv_table_t')) | ||
| # Default fixed point precision, in case values from layer attributes cannot be extracted | ||
| # 8 bits for integer part, 10 bits for decimal - total, 18 | ||
| fp_bits = 18 | ||
| fp_integer = 8 | ||
| fp_signed = True | ||
|
|
||
| # Invert table should use the same precision as exp_table, as seen in Vivado code | ||
| # init_invert_table<typename CONFIG_T::exp_table_t, CONFIG_T>(invert_table); | ||
| for layer in model.get_layers(): | ||
| if layer.name == 'softmax': | ||
| ac_type = layer.get_attr('exp_table_t') | ||
| if ac_type is not None: | ||
| try: | ||
| fp_bits = ac_type.precision.integer + ac_type.precision.fractional | ||
| fp_integer = ac_type.precision.integer | ||
| fp_signed = ac_type.precision.signed | ||
| except Exception: | ||
| # FixedPrecisionType wasn't correctly stored in layer attributes, use default values | ||
| pass | ||
| if fp_signed is False: | ||
| raise Exception('Softmax types need to be signed') | ||
| precision = self.__get_table_precision(model, 'softmax', table_name='inv_inp_t') | ||
|
|
||
| if precision is None: | ||
| fp_bits = 18 | ||
| fp_integer = 8 | ||
| fp_signed = True | ||
|
|
||
| for layer in model.get_layers(): | ||
| if layer.name == 'softmax': | ||
| ac_type = layer.get_attr('exp_table_t') | ||
| if ac_type is not None: | ||
| try: | ||
| fp_bits = ac_type.precision.integer + ac_type.precision.fractional | ||
| fp_integer = ac_type.precision.integer | ||
| fp_signed = ac_type.precision.signed | ||
| except Exception: | ||
| # FixedPrecisionType wasn't correctly stored in layer attributes, use default values | ||
| pass | ||
| if fp_signed is False: | ||
| raise Exception('Softmax types need to be signed') | ||
|
|
||
| else: | ||
| fp_bits = precision.width | ||
| fp_integer = precision.integer | ||
| fp_signed = precision.signed | ||
|
|
||
| f_bits = fp_bits - fp_integer | ||
| sep = '' | ||
| N = ceil_log2(table_size) | ||
| for i in range(table_size): | ||
| f = FixedPointEmulator(fp_bits, fp_integer, signed=fp_signed) | ||
| b = uint_to_binary(i, N) | ||
| b.insert(0, 0) | ||
| f.set_msb_bits(b) | ||
| real_val = f.inv_float() | ||
| # Index represents the raw bit pattern of the input | ||
| real_val_in = i * (2.0 ** (-f_bits)) | ||
|
|
||
| if real_val_in == 0: | ||
| real_val = 999.0 | ||
| else: | ||
| real_val = 1.0 / real_val_in | ||
|
|
||
| h_file.write(sep + str(real_val)) | ||
| sep = ', ' | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.