#ifndef __GEMM_INT16x4_WRAPPER_CC__
#define __GEMM_INT16x4_WRAPPER_CC__
#include "activated_mmult_qdq_int16x4_impl.hpp"

void run_gemm_int16x4(KernelArgs& args)
{
    MMultQdQInt16x4Params* mmult_params = static_cast<MMultQdQInt16x4Params*>(args.params_data);
    BufferPort* ifm = (mmult_params->mode == 0) ? args.s2mm_ch0_port : args.s2mm_ch1_port;
    BufferPort* wgt = (mmult_params->mode == 0) ? args.s2mm_ch1_port : args.s2mm_ch0_port;
    BufferPort* ofm = args.mm2s_ch0_port;
    logical_BufferPort coeff(
        reinterpret_cast<void*>(byte_incr(wgt->get_curr_addr(), mmult_params->wgt_size)),
        reinterpret_cast<void*>(byte_incr(wgt->get_prev_addr(), mmult_params->wgt_size))
    );
    int8* spill_buf = reinterpret_cast<int8*>(conv_to_local_ptr(mmult_params->spill_buff));
    int8* ifm_tmp_buf = reinterpret_cast<int8*>(conv_to_local_ptr(mmult_params->ifm_tmp_buf));
    int8* coeff_tmp_buf = reinterpret_cast<int8*>(conv_to_local_ptr(mmult_params->coeff_tmp_buf));
    activated_mmult_qdq_int16x4<1, 2, float>(
        *ifm,
        *wgt,
        coeff,
        *ofm,
        spill_buf,
        ifm_tmp_buf,
        coeff_tmp_buf,
        *mmult_params
    );
}
#endif // __GEMM_INT16x4_WRAPPER_CC__