
/*
               Copyright (C) 2019 - 2022 Xilinx, Inc. All rights reserved.
               Copyright (C) 2022 - 2025 Advanced Micro Devices, Inc. All rights reserved.
               This file contains confidential and proprietary information
               of Xilinx, Inc. and is protected under U.S. and
               international copyright and other intellectual property laws */

#ifndef __SUPER_KERNEL_TYPES_H__
#define __SUPER_KERNEL_TYPES_H__
#include <stdint.h>
#include "conv/conv_xint8/kernel_setup/mllib_config.h"

enum class ReluType : uint8_t {
    NoRelu = 0, ///< There is a Relu operator but it is not used.
    Relu = 1,
    Relu6 = 2,
    Leaky_Prelu = 4,
    Hsigmoid = 5,
    Hswish = 6,
    None = 0, ///< There is not even a Relu operator available in the code
};

struct super_kernel_params_t {
    alignas(int16_t) ReluType act_type_1;
    int16_t tile_ocg;
    int16_t str_w;
    int16_t shift_bias_1;
    int16_t shift_out_1;
    int16_t ifm_sign;
    int16_t shift_psum_in;
    int16_t shift_psum_out;
    int16_t shift_out16;
    int16_t shift_leaky;
    int16_t leaky_alpha;
    int16_t upshift_elw_ifm1 ;
    int16_t upshift_elw_ifm2 ;
    int16_t downshift_eltw_res ;

    int16_t step_align;

    // parallelism & loop
    int16_t shfl;
    int16_t shft;

    int16_t incAI1;
    int16_t numAL1;
    int16_t incAL1;
    int16_t numAL2;
    int16_t incAL2;
    int16_t incAL3;
    int16_t numAO1;
    int16_t incAO1;
    int16_t numAO2;
    int16_t incAO2;
    int16_t incAO3;
    int16_t numB;
    int16_t incB1;
    int16_t incB2;
    int16_t incS0;
    int16_t numCS1;
    int16_t incCS1;
    int16_t numCS2;
    int16_t incCS2;
    int16_t incCS3;
    int16_t inner_loop;
    int16_t outer_loop;

    int32_t psum0;
    int32_t psum1;

    // Pointer to the buffer to store the output of the convolution. This is needed for layers where it is fused with
    // other operators. This field is ignored if its value is 0.
    int32_t conv_out;

    uint8_t num_ifm_depth_iter;
    enum conv_type conv_type;
    uint8_t stride_bits;

    int32_t hdr_len;
    int32_t wts_offset;
    uint32_t wts_sv_len;
    uint16_t ofm_len;
    uint16_t num_iter;
    uint16_t wrapper_iter;
    uint8_t out_mode;
    layer_mode op_mode;
    uint8_t stride2_exec_type;

    uint8_t global_num_cols;

    //experimental types for padding
    int32_t ifm_len;
    uint8_t spatial_split_w_ld; //describe the w split to determine padding
    uint8_t spatial_split_h_ld; //describe the h split to determine padding
    uint16_t dwc_stride_w;
    uint16_t dwc_stride_h;
    uint16_t spacial_step_h;
    uint16_t pixel_h;
    uint16_t pixel_w;
    uint8_t pad_val;
    uint8_t pad_top;
    uint8_t pad_bottom;
    uint8_t pad_left;
    uint8_t pad_right;
    uint8_t sv_x;
    uint8_t sv_y;
    uint8_t nifms;
    //used to describe movement along the image
    uint16_t ddr2mt_c;
    uint16_t mt2aie_h;
    uint16_t mt2aie_w;

#if IFM_REUSE
    // Keep track of the number of iterations reusing the same IFMs
    uint8_t ifm_reuse_iter;
#endif

#if WTS_REUSE
    // Keep track of the number of iterations reusing the same WTS
    uint8_t wts_reuse_iter;
#endif

};

struct leakyrelu_kernel_params_t {
    int8_t shift_out;
    int8_t shift_alpha;
    int16_t alpha;
};

#endif
