#ifndef __GEMM_INT16x16_TRANSPOSE_WRAPPER_CC__
#define __GEMM_INT16x16_TRANSPOSE_WRAPPER_CC__
#include "activated_mmult_qdq_int16x16_transpose_impl.hpp"

void run_gemm_int16x16_transpose(KernelArgs& args)
{

    ActivatedMMultKernelParamsTranspose* mmult_params = static_cast<ActivatedMMultKernelParamsTranspose*>(args.params_data);
    BufferPort* act1 = (mmult_params->mode == 0) ? args.s2mm_ch0_port : args.s2mm_ch1_port;
    BufferPort* act2 = (mmult_params->mode == 0) ? args.s2mm_ch1_port : args.s2mm_ch0_port;
    BufferPort* ofm = args.mm2s_ch0_port;

    int8* tdm_buf = reinterpret_cast<int8*>(mmult_params->tdm_buf);
    int8* wght_transpose_sb = reinterpret_cast<int8*>(mmult_params->wght_transpose_sb);
    int8* cfqdq_buf = reinterpret_cast<int8*>(mmult_params->cfqdq_buf);
    GemmQdqint16x16_RT_Params* qdq_params = reinterpret_cast<GemmQdqint16x16_RT_Params*>(mmult_params->qdq_buf);
    float* scalar_coeffs = &(qdq_params->c0);
    int vector_coeffs = qdq_params->vector_coeff;
    logical_BufferPort coeff(
        reinterpret_cast<void*>(mmult_params->cfqdq_buf),
        reinterpret_cast<void*>(mmult_params->cfqdq_buf)
    );

    activated_mmult_qdq_int16x16_transpose<2, 0, float>(
        *act1,
        *act2,
        coeff,
        *ofm,
        tdm_buf,
        wght_transpose_sb,
        cfqdq_buf,
        scalar_coeffs,
        vector_coeffs,
        *mmult_params
    );

}   

#endif //__GEMM_INT16x16_WRAPPER_CC__