-- ==============================================================
-- Generated by Vitis HLS v2025.1
-- Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
-- Copyright 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
-- ==============================================================

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi is
    generic (
        C_M_AXI_ID_WIDTH          : INTEGER := 1;
        C_M_AXI_ADDR_WIDTH        : INTEGER := 32;
        C_M_AXI_DATA_WIDTH        : INTEGER := 32;
        C_M_AXI_AWUSER_WIDTH      : INTEGER := 1;
        C_M_AXI_ARUSER_WIDTH      : INTEGER := 1;
        C_M_AXI_WUSER_WIDTH       : INTEGER := 1;
        C_M_AXI_RUSER_WIDTH       : INTEGER := 1;
        C_M_AXI_BUSER_WIDTH       : INTEGER := 1;
        C_TARGET_ADDR             : INTEGER := 16#00000000#;
        C_USER_VALUE              : INTEGER := 0;
        C_PROT_VALUE              : INTEGER := 2#000#;
        C_CACHE_VALUE             : INTEGER := 2#0011#;
        CONSERVATIVE              : INTEGER := 0;
        MAX_READ_BURST_LENGTH     : INTEGER := 16;
        MAX_WRITE_BURST_LENGTH    : INTEGER := 16;
        NUM_READ_OUTSTANDING      : INTEGER := 2;
        NUM_WRITE_OUTSTANDING     : INTEGER := 2;
        USER_MAXREQS              : INTEGER := 16;
        USER_LEN_WIDTH            : INTEGER := 32;
        -- channel configurations 
        CH0_USER_DW               : INTEGER := 32;
        CH0_USER_AW               : INTEGER := 32;
        CH0_NUM_READ_OUTSTANDING  : INTEGER := 2;
        CH0_NUM_WRITE_OUTSTANDING : INTEGER := 2;
        CH0_USER_RFIFONUM_WIDTH   : INTEGER := 6;
        MAXI_BUFFER_IMPL          : STRING  := "block");
    port (
        -- system signal
        ACLK            : in  STD_LOGIC;
        ARESET          : in  STD_LOGIC;
        ACLK_EN         : in  STD_LOGIC;
        -- write address channel
        AWID            : out STD_LOGIC_VECTOR(C_M_AXI_ID_WIDTH-1 downto 0);
        AWADDR          : out STD_LOGIC_VECTOR(C_M_AXI_ADDR_WIDTH-1 downto 0);
        AWLEN           : out STD_LOGIC_VECTOR(7 downto 0);
        AWSIZE          : out STD_LOGIC_VECTOR(2 downto 0);
        AWBURST         : out STD_LOGIC_VECTOR(1 downto 0);
        AWLOCK          : out STD_LOGIC_VECTOR(1 downto 0);
        AWCACHE         : out STD_LOGIC_VECTOR(3 downto 0);
        AWPROT          : out STD_LOGIC_VECTOR(2 downto 0);
        AWQOS           : out STD_LOGIC_VECTOR(3 downto 0);
        AWREGION        : out STD_LOGIC_VECTOR(3 downto 0);
        AWUSER          : out STD_LOGIC_VECTOR(C_M_AXI_AWUSER_WIDTH-1 downto 0);
        AWVALID         : out STD_LOGIC;
        AWREADY         : in  STD_LOGIC;
        -- write data channel
        WID             : out STD_LOGIC_VECTOR(C_M_AXI_ID_WIDTH-1 downto 0);
        WDATA           : out STD_LOGIC_VECTOR(C_M_AXI_DATA_WIDTH-1 downto 0);
        WSTRB           : out STD_LOGIC_VECTOR(C_M_AXI_DATA_WIDTH/8-1 downto 0);
        WLAST           : out STD_LOGIC;
        WUSER           : out STD_LOGIC_VECTOR(C_M_AXI_WUSER_WIDTH-1 downto 0);
        WVALID          : out STD_LOGIC;
        WREADY          : in  STD_LOGIC;
        -- write response channel
        BID             : in  STD_LOGIC_VECTOR(C_M_AXI_ID_WIDTH-1 downto 0);
        BRESP           : in  STD_LOGIC_VECTOR(1 downto 0);
        BUSER           : in  STD_LOGIC_VECTOR(C_M_AXI_BUSER_WIDTH-1 downto 0);
        BVALID          : in  STD_LOGIC;
        BREADY          : out STD_LOGIC;
        -- read address channel
        ARID            : out STD_LOGIC_VECTOR(C_M_AXI_ID_WIDTH-1 downto 0);
        ARADDR          : out STD_LOGIC_VECTOR(C_M_AXI_ADDR_WIDTH-1 downto 0);
        ARLEN           : out STD_LOGIC_VECTOR(7 downto 0);
        ARSIZE          : out STD_LOGIC_VECTOR(2 downto 0);
        ARBURST         : out STD_LOGIC_VECTOR(1 downto 0);
        ARLOCK          : out STD_LOGIC_VECTOR(1 downto 0);
        ARCACHE         : out STD_LOGIC_VECTOR(3 downto 0);
        ARPROT          : out STD_LOGIC_VECTOR(2 downto 0);
        ARQOS           : out STD_LOGIC_VECTOR(3 downto 0);
        ARREGION        : out STD_LOGIC_VECTOR(3 downto 0);
        ARUSER          : out STD_LOGIC_VECTOR(C_M_AXI_ARUSER_WIDTH-1 downto 0);
        ARVALID         : out STD_LOGIC;
        ARREADY         : in  STD_LOGIC;
        -- read data channel
        RID             : in  STD_LOGIC_VECTOR(C_M_AXI_ID_WIDTH-1 downto 0);
        RDATA           : in  STD_LOGIC_VECTOR(C_M_AXI_DATA_WIDTH-1 downto 0);
        RRESP           : in  STD_LOGIC_VECTOR(1 downto 0);
        RLAST           : in  STD_LOGIC;
        RUSER           : in  STD_LOGIC_VECTOR(C_M_AXI_RUSER_WIDTH-1 downto 0);
        RVALID          : in  STD_LOGIC;
        RREADY          : out STD_LOGIC;
        -- multiple internal channels 
        -- channel 0 --  WRITE-ONLY 
        I_CH0_AWADDR    : in  STD_LOGIC_VECTOR(CH0_USER_AW-1 downto 0);
        I_CH0_AWLEN     : in  STD_LOGIC_VECTOR(USER_LEN_WIDTH-1 downto 0);
        I_CH0_AWVALID   : in  STD_LOGIC;
        I_CH0_AWREADY   : out STD_LOGIC;
        I_CH0_WDATA     : in  STD_LOGIC_VECTOR(CH0_USER_DW-1 downto 0);
        I_CH0_WSTRB     : in  STD_LOGIC_VECTOR(CH0_USER_DW/8-1 downto 0);
        I_CH0_WVALID    : in  STD_LOGIC;
        I_CH0_WREADY    : out STD_LOGIC;
        I_CH0_BVALID    : out STD_LOGIC;
        I_CH0_BREADY    : in  STD_LOGIC;
        I_CH0_ARADDR    : in  STD_LOGIC_VECTOR(CH0_USER_AW-1 downto 0);
        I_CH0_ARLEN     : in  STD_LOGIC_VECTOR(USER_LEN_WIDTH-1 downto 0);
        I_CH0_ARVALID   : in  STD_LOGIC;
        I_CH0_ARREADY   : out STD_LOGIC;
        I_CH0_RDATA     : out STD_LOGIC_VECTOR(CH0_USER_DW-1 downto 0);
        I_CH0_RFIFONUM  : out STD_LOGIC_VECTOR(CH0_USER_RFIFONUM_WIDTH-1 downto 0);
        I_CH0_RVALID    : out STD_LOGIC;
        I_CH0_RREADY    : in  STD_LOGIC
        );
end entity s2mm_gmem_m_axi;

architecture behave of s2mm_gmem_m_axi is
    --========================Constant========================
    constant NUM_READ_PORTS  : INTEGER := 0;
    constant NUM_WRITE_PORTS : INTEGER := 1;
    --========================Component======================== 
    component s2mm_gmem_m_axi_store is
        generic (
            CONSERVATIVE           : INTEGER := 0;
            C_TARGET_ADDR          : INTEGER := 16#00000000#;
            NUM_WRITE_OUTSTANDING  : INTEGER := 2;
            MAX_WRITE_BURST_LENGTH : INTEGER := 16;
            BUS_ADDR_WIDTH         : INTEGER := 32;
            BUS_DATA_WIDTH         : INTEGER := 32;
            USER_DW                : INTEGER := 16;
            USER_AW                : INTEGER := 32;
            USER_LEN_WIDTH         : INTEGER := 32;
            USER_MAXREQS           : INTEGER := 16;
            BUFFER_IMPL            : STRING  := "auto");
        port (
            ACLK                   : in  STD_LOGIC;
            ARESET                 : in  STD_LOGIC;
            ACLK_EN                : in  STD_LOGIC;
            out_AXI_AWADDR         : out UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
            out_AXI_AWLEN          : out UNSIGNED(USER_LEN_WIDTH-1 downto 0);
            out_AXI_AWVALID        : out STD_LOGIC;
            in_AXI_AWREADY         : in  STD_LOGIC;
            in_BURST_AWLEN         : in  UNSIGNED(7 downto 0);
            in_BURST_AWVALID       : in  STD_LOGIC;
            out_BURST_WLEN         : out UNSIGNED(7 downto 0);
            out_BURST_WVALID       : out STD_LOGIC;
            in_BURST_WREADY        : in  STD_LOGIC;
            out_AXI_WDATA          : out UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
            out_AXI_WSTRB          : out UNSIGNED(BUS_DATA_WIDTH/8-1 downto 0);
            out_AXI_WVALID         : out STD_LOGIC;
            in_AXI_WREADY          : in  STD_LOGIC;
            in_AXI_BVALID          : in  STD_LOGIC;
            out_AXI_BREADY         : out STD_LOGIC; 
            in_HLS_AWADDR          : in  UNSIGNED(USER_AW-1 downto 0);
            in_HLS_AWLEN           : in  UNSIGNED(USER_LEN_WIDTH-1 downto 0);
            in_HLS_AWVALID         : in  STD_LOGIC;
            out_HLS_AWREADY        : out STD_LOGIC;
            in_HLS_WDATA           : in  UNSIGNED(USER_DW-1 downto 0);
            in_HLS_WSTRB           : in  UNSIGNED(USER_DW/8-1 downto 0);
            in_HLS_WVALID          : in  STD_LOGIC;
            out_HLS_WREADY         : out STD_LOGIC;
            out_HLS_BVALID         : out STD_LOGIC;
            in_HLS_BREADY          : in  STD_LOGIC);
    end component s2mm_gmem_m_axi_store;
    component s2mm_gmem_m_axi_write is
        generic (
            CONSERVATIVE           : INTEGER := 0;
            C_M_AXI_ID_WIDTH       : INTEGER := 1;
            C_M_AXI_AWUSER_WIDTH   : INTEGER := 1;
            C_M_AXI_WUSER_WIDTH    : INTEGER := 1;
            C_M_AXI_BUSER_WIDTH    : INTEGER := 1;
            C_USER_VALUE           : INTEGER := 0;
            C_PROT_VALUE           : INTEGER := 0;
            C_CACHE_VALUE          : INTEGER := 2#0011#;
            BUS_ADDR_WIDTH         : INTEGER := 32;
            BUS_DATA_WIDTH         : INTEGER := 32;
            USER_LEN_WIDTH         : INTEGER := 32;
            MAX_WRITE_BURST_LENGTH : INTEGER := 1;
            NUM_WRITE_OUTSTANDING  : INTEGER := 1;
            ID0_NUM_WRITE_OUTSTANDING : INTEGER := 2;
            NUM_WRITE_PORTS        : INTEGER := 1);
        port (
            ACLK                   : in  STD_LOGIC;
            ARESET                 : in  STD_LOGIC;
            ACLK_EN                : in  STD_LOGIC;
            out_BUS_AWID           : out UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
            out_BUS_AWADDR         : out UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
            out_BUS_AWLEN          : out UNSIGNED(7 downto 0);
            out_BUS_AWSIZE         : out UNSIGNED(2 downto 0);
            out_BUS_AWBURST        : out UNSIGNED(1 downto 0);
            out_BUS_AWLOCK         : out UNSIGNED(1 downto 0);
            out_BUS_AWCACHE        : out UNSIGNED(3 downto 0);
            out_BUS_AWPROT         : out UNSIGNED(2 downto 0);
            out_BUS_AWQOS          : out UNSIGNED(3 downto 0);
            out_BUS_AWREGION       : out UNSIGNED(3 downto 0);
            out_BUS_AWUSER         : out UNSIGNED(C_M_AXI_AWUSER_WIDTH-1 downto 0);
            out_BUS_AWVALID        : out STD_LOGIC;
            in_BUS_AWREADY         : in  STD_LOGIC;
            out_BUS_WID            : out UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
            out_BUS_WDATA          : out UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
            out_BUS_WSTRB          : out UNSIGNED(BUS_DATA_WIDTH/8-1 downto 0);
            out_BUS_WLAST          : out STD_LOGIC;
            out_BUS_WUSER          : out UNSIGNED(C_M_AXI_WUSER_WIDTH-1 downto 0);
            out_BUS_WVALID         : out STD_LOGIC;
            in_BUS_WREADY          : in  STD_LOGIC;
            in_BUS_BID             : in  UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
            in_BUS_BRESP           : in  UNSIGNED(1 downto 0);
            in_BUS_BUSER           : in  UNSIGNED(C_M_AXI_BUSER_WIDTH-1 downto 0);
            in_BUS_BVALID          : in  STD_LOGIC;
            out_BUS_BREADY         : out STD_LOGIC;
            in_AXI_AWID            : in  UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
            in_AXI_AWADDR          : in  UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
            in_AXI_AWLEN           : in  UNSIGNED(USER_LEN_WIDTH-1 downto 0);
            in_AXI_AWVALID         : in  STD_LOGIC;
            out_AXI_AWREADY        : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
            out_AXI_WID            : out UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
            in_AXI_WDATA           : in  UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
            in_AXI_WSTRB           : in  UNSIGNED(BUS_DATA_WIDTH/8-1 downto 0);
            in_AXI_WVALID          : in  STD_LOGIC;
            out_AXI_WREADY         : out STD_LOGIC;
            out_AXI_BVALID         : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
            in_AXI_BREADY          : in  UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
            out_BURST_AWLEN        : out UNSIGNED(7 downto 0);
            out_BURST_AWVALID      : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
            in_BURST_WID           : in  UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
            in_BURST_WLEN          : in  UNSIGNED(7 downto 0);
            in_BURST_WVALID        : in  STD_LOGIC;
            out_BURST_WREADY       : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0));
    end component s2mm_gmem_m_axi_write;
    --========================Local Signals===================
    -- AW/W/B channel signals 
    type AWADDR_ARRAY is array (0 to NUM_WRITE_PORTS-1) of UNSIGNED(C_M_AXI_ADDR_WIDTH-1 downto 0);
    type AWLEN_ARRAY  is array (0 to NUM_WRITE_PORTS-1) of UNSIGNED(USER_LEN_WIDTH-1 downto 0);
    type WDATA_ARRAY  is array (0 to NUM_WRITE_PORTS-1) of UNSIGNED(C_M_AXI_DATA_WIDTH-1 downto 0);
    type WSTRB_ARRAY  is array (0 to NUM_WRITE_PORTS-1) of UNSIGNED(C_M_AXI_DATA_WIDTH/8-1 downto 0);
    type BURST_LEN_ARRAY is array (0 to NUM_WRITE_PORTS-1) of UNSIGNED(7 downto 0);

    signal local_CHN_AWADDR        : AWADDR_ARRAY;
    signal local_CHN_AWLEN         : AWLEN_ARRAY;
    signal local_CHN_AWVALID       : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_CHN_AWREADY       : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_CHN_WDATA         : WDATA_ARRAY;
    signal local_CHN_WSTRB         : WSTRB_ARRAY;
    signal local_CHN_WVALID        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_CHN_WREADY        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);

    signal local_AXI_AWID          : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal local_AXI_AWADDR        : UNSIGNED(C_M_AXI_ADDR_WIDTH-1 downto 0);
    signal local_AXI_AWLEN         : UNSIGNED(USER_LEN_WIDTH-1 downto 0);
    signal local_AXI_AWVALID       : STD_LOGIC;
    signal local_AXI_AWREADY       : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_AXI_WID           : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal local_AXI_WDATA         : UNSIGNED(C_M_AXI_DATA_WIDTH-1 downto 0);
    signal local_AXI_WSTRB         : UNSIGNED(C_M_AXI_DATA_WIDTH/8-1 downto 0);
    signal local_AXI_WVALID        : STD_LOGIC;
    signal local_AXI_WREADY        : STD_LOGIC;
    signal local_AXI_BVALID        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_AXI_BREADY        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);

    signal local_BURST_AWLEN       : UNSIGNED(7 downto 0);
    signal local_BURST_AWVALID     : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_BURST_WID         : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal local_BURST_WLEN        : UNSIGNED(7 downto 0);
    signal local_BURST_WVALID      : STD_LOGIC;
    signal local_BURST_WREADY      : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_CHN_BURST_WLEN    : BURST_LEN_ARRAY;
    signal local_CHN_BURST_WVALID  : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal local_CHN_BURST_WREADY  : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);

    -- AR/R channel signals 

    -- flush logic 
begin
    -- AXI Ports Initialization
    ARID     <= (others=>'0');
    ARADDR   <= (others=>'0');
    ARLEN    <= (others=>'0');
    ARSIZE   <= (others=>'0');
    ARBURST  <= (others=>'0');
    ARLOCK   <= (others=>'0');
    ARCACHE  <= (others=>'0');
    ARPROT   <= (others=>'0');
    ARQOS    <= (others=>'0');
    ARREGION <= (others=>'0');
    ARUSER   <= (others=>'0');
    ARVALID  <= '0';
    RREADY   <= '0';

    -- Kernel Ports Initialization
    I_CH0_ARREADY   <= '0';
    I_CH0_RDATA     <= (others=>'0');
    I_CH0_RVALID    <= '0';
    I_CH0_RFIFONUM  <= (others=>'0');

    -- Internal Ports Mapping
    local_AXI_AWID        <= (others=>'0');
    local_AXI_AWVALID     <= local_CHN_AWVALID(0);
    local_AXI_WVALID      <= local_CHN_WVALID(0);
    local_CHN_AWREADY     <= local_AXI_AWREADY;
    local_CHN_WREADY(0)   <= local_AXI_WREADY;
    local_BURST_WID       <= (others=>'0');
    local_BURST_WVALID    <= local_CHN_BURST_WVALID(0);
    local_CHN_BURST_WREADY<= local_BURST_WREADY;
    local_AXI_AWADDR      <= local_CHN_AWADDR(TO_INTEGER(local_AXI_AWID));
    local_AXI_AWLEN       <= local_CHN_AWLEN(TO_INTEGER(local_AXI_AWID));
    local_AXI_WDATA       <= local_CHN_WDATA(TO_INTEGER(local_AXI_WID));
    local_AXI_WSTRB       <= local_CHN_WSTRB(TO_INTEGER(local_AXI_WID));
    local_BURST_WLEN      <= local_CHN_BURST_WLEN(TO_INTEGER(local_BURST_WID));

    -- flush logic 
    --========================Instantiation========================
    -- ++++++++++++++++++++++ STORE UNITS ++++++++++++++++++++++ 
    -- store_unit for channel 0
    store_unit_0 : s2mm_gmem_m_axi_store
    generic map (
        CONSERVATIVE           => CONSERVATIVE,
        C_TARGET_ADDR          => C_TARGET_ADDR,
        NUM_WRITE_OUTSTANDING  => CH0_NUM_WRITE_OUTSTANDING,
        MAX_WRITE_BURST_LENGTH => MAX_WRITE_BURST_LENGTH,
        BUS_ADDR_WIDTH         => C_M_AXI_ADDR_WIDTH,
        BUS_DATA_WIDTH         => C_M_AXI_DATA_WIDTH,
        USER_DW                => CH0_USER_DW,
        USER_AW                => CH0_USER_AW,
        USER_LEN_WIDTH         => USER_LEN_WIDTH,
        USER_MAXREQS           => USER_MAXREQS,
        BUFFER_IMPL            => MAXI_BUFFER_IMPL) 
    port map (
        ACLK                   => ACLK,
        ARESET                 => ARESET,
        ACLK_EN                => ACLK_EN,
        out_AXI_AWADDR         => local_CHN_AWADDR(0),
        out_AXI_AWLEN          => local_CHN_AWLEN(0),
        out_AXI_AWVALID        => local_CHN_AWVALID(0),
        in_AXI_AWREADY         => local_CHN_AWREADY(0),
        in_BURST_AWLEN         => local_BURST_AWLEN,
        in_BURST_AWVALID       => local_BURST_AWVALID(0),
        out_BURST_WLEN         => local_CHN_BURST_WLEN(0),
        out_BURST_WVALID       => local_CHN_BURST_WVALID(0),
        in_BURST_WREADY        => local_CHN_BURST_WREADY(0),
        out_AXI_WDATA          => local_CHN_WDATA(0),
        out_AXI_WSTRB          => local_CHN_WSTRB(0),
        out_AXI_WVALID         => local_CHN_WVALID(0),
        in_AXI_WREADY          => local_CHN_WREADY(0),
        in_AXI_BVALID          => local_AXI_BVALID(0),
        out_AXI_BREADY         => local_AXI_BREADY(0),
        in_HLS_AWADDR          => UNSIGNED(I_CH0_AWADDR),
        in_HLS_AWLEN           => UNSIGNED(I_CH0_AWLEN) ,
        in_HLS_AWVALID         => I_CH0_AWVALID         ,
        out_HLS_AWREADY        => I_CH0_AWREADY         ,
        in_HLS_WDATA           => UNSIGNED(I_CH0_WDATA) ,
        in_HLS_WSTRB           => UNSIGNED(I_CH0_WSTRB) ,
        in_HLS_WVALID          => I_CH0_WVALID          ,
        out_HLS_WREADY         => I_CH0_WREADY          ,
        out_HLS_BVALID         => I_CH0_BVALID          ,
        in_HLS_BREADY          => I_CH0_BREADY          );
    -- ++++++++++++++++++++++ LOAD UNITS ++++++++++++++++++++++



    -- ++++++++++++++++++++++ AXI BUS READ/WRITE ++++++++++++++++++++++
    -- s2mm_gmem_m_axi_write 
    bus_write : s2mm_gmem_m_axi_write
    generic map (
        CONSERVATIVE                       => CONSERVATIVE,
        C_M_AXI_ID_WIDTH                   => C_M_AXI_ID_WIDTH,
        C_M_AXI_AWUSER_WIDTH               => C_M_AXI_AWUSER_WIDTH,
        C_M_AXI_WUSER_WIDTH                => C_M_AXI_WUSER_WIDTH,
        C_M_AXI_BUSER_WIDTH                => C_M_AXI_BUSER_WIDTH,
        C_USER_VALUE                       => C_USER_VALUE,
        C_PROT_VALUE                       => C_PROT_VALUE,
        C_CACHE_VALUE                      => C_CACHE_VALUE,
        BUS_ADDR_WIDTH                     => C_M_AXI_ADDR_WIDTH,
        BUS_DATA_WIDTH                     => C_M_AXI_DATA_WIDTH,
        USER_LEN_WIDTH                     => USER_LEN_WIDTH,
        MAX_WRITE_BURST_LENGTH             => MAX_WRITE_BURST_LENGTH,
        NUM_WRITE_OUTSTANDING              => NUM_WRITE_OUTSTANDING,
        -- outstanding control for channels
        ID0_NUM_WRITE_OUTSTANDING          => CH0_NUM_WRITE_OUTSTANDING,
        NUM_WRITE_PORTS                    => NUM_WRITE_PORTS)
    port map (
        ACLK                               => ACLK,
        ARESET                             => ARESET,
        ACLK_EN                            => ACLK_EN,
        STD_LOGIC_VECTOR(out_BUS_AWID)     => AWID,
        STD_LOGIC_VECTOR(out_BUS_AWADDR)   => AWADDR,
        STD_LOGIC_VECTOR(out_BUS_AWLEN)    => AWLEN,
        STD_LOGIC_VECTOR(out_BUS_AWSIZE)   => AWSIZE,
        STD_LOGIC_VECTOR(out_BUS_AWBURST)  => AWBURST,
        STD_LOGIC_VECTOR(out_BUS_AWLOCK)   => AWLOCK,
        STD_LOGIC_VECTOR(out_BUS_AWCACHE)  => AWCACHE,
        STD_LOGIC_VECTOR(out_BUS_AWPROT)   => AWPROT,
        STD_LOGIC_VECTOR(out_BUS_AWQOS)    => AWQOS,
        STD_LOGIC_VECTOR(out_BUS_AWREGION) => AWREGION,
        STD_LOGIC_VECTOR(out_BUS_AWUSER)   => AWUSER,
        out_BUS_AWVALID                    => AWVALID ,
        in_BUS_AWREADY                     => AWREADY ,
        STD_LOGIC_VECTOR(out_BUS_WID)      => WID,
        STD_LOGIC_VECTOR(out_BUS_WDATA)    => WDATA,
        STD_LOGIC_VECTOR(out_BUS_WSTRB)    => WSTRB,
        out_BUS_WLAST                      => WLAST,
        STD_LOGIC_VECTOR(out_BUS_WUSER)    => WUSER,
        out_BUS_WVALID                     => WVALID ,
        in_BUS_WREADY                      => WREADY,
        in_BUS_BID                         => UNSIGNED(BID),
        in_BUS_BRESP                       => UNSIGNED(BRESP),
        in_BUS_BUSER                       => UNSIGNED(BUSER),
        in_BUS_BVALID                      => BVALID,
        out_BUS_BREADY                     => BREADY ,
        in_AXI_AWID                        => local_AXI_AWID,
        in_AXI_AWVALID                     => local_AXI_AWVALID,
        out_AXI_AWREADY                    => local_AXI_AWREADY,
        in_AXI_AWADDR                      => local_AXI_AWADDR,
        in_AXI_AWLEN                       => local_AXI_AWLEN,
        out_AXI_WID                        => local_AXI_WID,
        in_AXI_WVALID                      => local_AXI_WVALID,
        out_AXI_WREADY                     => local_AXI_WREADY,
        in_AXI_WSTRB                       => local_AXI_WSTRB,
        in_AXI_WDATA                       => local_AXI_WDATA,
        out_AXI_BVALID                     => local_AXI_BVALID,
        in_AXI_BREADY                      => local_AXI_BREADY,
        out_BURST_AWLEN                    => local_BURST_AWLEN,
        out_BURST_AWVALID                  => local_BURST_AWVALID,
        in_BURST_WID                       => local_BURST_WID,
        in_BURST_WLEN                      => local_BURST_WLEN,
        in_BURST_WVALID                    => local_BURST_WVALID,
        out_BURST_WREADY                   => local_BURST_WREADY
        );

    -- s2mm_gmem_m_axi_read 

end architecture behave;

-- 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_store is
    generic (
        CONSERVATIVE           : INTEGER := 0;
        C_TARGET_ADDR          : INTEGER := 16#00000000#;
        NUM_WRITE_OUTSTANDING  : INTEGER := 2;
        MAX_WRITE_BURST_LENGTH : INTEGER := 16;
        BUS_ADDR_WIDTH         : INTEGER := 32;
        BUS_DATA_WIDTH         : INTEGER := 32;
        USER_DW                : INTEGER := 16;
        USER_AW                : INTEGER := 32;
        USER_LEN_WIDTH         : INTEGER := 32;
        USER_MAXREQS           : INTEGER := 16;
        BUFFER_IMPL            : STRING  := "auto"); 
    port (
        ACLK                   : in  STD_LOGIC;
        ARESET                 : in  STD_LOGIC;
        ACLK_EN                : in  STD_LOGIC;
        out_AXI_AWADDR         : out UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
        out_AXI_AWLEN          : out UNSIGNED(USER_LEN_WIDTH-1 downto 0);
        out_AXI_AWVALID        : out STD_LOGIC;
        in_AXI_AWREADY         : in  STD_LOGIC;
        in_BURST_AWLEN         : in  UNSIGNED(7 downto 0);
        in_BURST_AWVALID       : in  STD_LOGIC;
        out_BURST_WLEN         : out UNSIGNED(7 downto 0);
        out_BURST_WVALID       : out STD_LOGIC;
        in_BURST_WREADY        : in  STD_LOGIC;
        out_AXI_WDATA          : out UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
        out_AXI_WSTRB          : out UNSIGNED(BUS_DATA_WIDTH/8-1 downto 0);
        out_AXI_WVALID         : out STD_LOGIC;
        in_AXI_WREADY          : in  STD_LOGIC;
        in_AXI_BVALID          : in  STD_LOGIC;
        out_AXI_BREADY         : out STD_LOGIC;
        in_HLS_AWADDR          : in  UNSIGNED(USER_AW-1 downto 0);
        in_HLS_AWLEN           : in  UNSIGNED(USER_LEN_WIDTH-1 downto 0); 
        in_HLS_AWVALID         : in  STD_LOGIC;
        out_HLS_AWREADY        : out STD_LOGIC;
        in_HLS_WDATA           : in  UNSIGNED(USER_DW-1 downto 0);
        in_HLS_WSTRB           : in  UNSIGNED(USER_DW/8-1 downto 0);
        in_HLS_WVALID          : in  STD_LOGIC;
        out_HLS_WREADY         : out STD_LOGIC;
        out_HLS_BVALID         : out STD_LOGIC;
        in_HLS_BREADY          : in  STD_LOGIC);
end entity s2mm_gmem_m_axi_store;

architecture behave of s2mm_gmem_m_axi_store is
    ------------------------Task and function--------------
    function max (x : INTEGER; 
                  y : INTEGER) return INTEGER is
        variable r : INTEGER;
    begin
        r := y;
        if (x > y) then
            r := x;
        end if;
        return r;
    end function max;

    function calc_data_width (x : INTEGER) return INTEGER is
        variable y : INTEGER;
    begin
        y := 8;
        while y < x loop
            y := y * 2;
        end loop;
        return y;
    end function calc_data_width;

    function log2 (x : INTEGER) return INTEGER is
        variable n, m : INTEGER;
    begin
        n := 0;
        m := 1;
        while m < x loop
            n := n + 1;
            m := m * 2;
        end loop;
        return n;
    end function log2;

    function cond_sel (cond : BOOLEAN; x : INTEGER; y : INTEGER) return INTEGER is
        variable ret : INTEGER;
    begin
        if (cond) then
            ret := x;
        else
            ret := y;
        end if;
        return ret;
    end function cond_sel;
    ------------------------Parameter----------------------
    constant USER_DATA_WIDTH  : INTEGER := calc_data_width(USER_DW);
    constant USER_DATA_BYTES  : INTEGER := USER_DATA_WIDTH / 8;
    constant USER_DATA_ALIGN  : INTEGER := USER_DATA_WIDTH;
    constant USER_ADDR_ALIGN  : INTEGER := log2(USER_DATA_BYTES);
    constant WREQ_PACK_WIDTH  : INTEGER := USER_AW + USER_LEN_WIDTH;
    constant BUS_DATA_BYTES   : INTEGER := BUS_DATA_WIDTH / 8;
    constant BUS_ADDR_ALIGN   : INTEGER := log2(BUS_DATA_BYTES);
    -- wdata buffer size 
    constant WBUFF_DEPTH      : INTEGER := cond_sel((USER_DATA_ALIGN = BUS_DATA_WIDTH), 
                                                    2 * MAX_WRITE_BURST_LENGTH, MAX_WRITE_BURST_LENGTH);
    constant PREFERRED_IMPL   : STRING  := "shiftreg" when (WBUFF_DEPTH <= 32) else BUFFER_IMPL;
    constant BURST_LEN_WIDTH  : INTEGER := max(log2(WBUFF_DEPTH + 1), 8);
    constant TARGET_ADDR      : INTEGER := (C_TARGET_ADDR/(2**USER_ADDR_ALIGN))*(2**USER_ADDR_ALIGN);
    ------------------------Local signal-------------------
    signal next_wreq      : STD_LOGIC;
    signal ready_for_wreq : STD_LOGIC;
    signal wreq_ready     : STD_LOGIC;
    signal wreq_valid     : STD_LOGIC;
    signal valid_length   : STD_LOGIC;

    signal wreq_addr      : UNSIGNED(USER_AW-1 downto 0);
    signal wreq_len       : UNSIGNED(USER_LEN_WIDTH-1 downto 0);
    signal tmp_addr       : UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
    signal tmp_len        : UNSIGNED(USER_LEN_WIDTH-1 downto 0);
    signal tmp_valid      : STD_LOGIC;

    signal out_AXI_WDATA_PACK   : UNSIGNED(BUS_DATA_WIDTH+BUS_DATA_BYTES-1 downto 0);
    signal local_AXI_WDATA_PACK : UNSIGNED(BUS_DATA_WIDTH+BUS_DATA_BYTES-1 downto 0);
    signal local_AXI_WVALID : STD_LOGIC;
    signal local_AXI_WREADY : STD_LOGIC;

    signal wrsp_ready     : STD_LOGIC;
    signal wrsp_valid     : STD_LOGIC;
    signal wrsp_read      : STD_LOGIC;
    signal wrsp_type      : UNSIGNED(0 downto 0);
    signal in_wrsp_type   : UNSIGNED(0 downto 0);
    signal ursp_ready     : STD_LOGIC;
    signal ursp_write     : STD_LOGIC;

    signal in_wreq_pack   : UNSIGNED(WREQ_PACK_WIDTH-1 downto 0);
    signal out_wreq_pack  : UNSIGNED(WREQ_PACK_WIDTH-1 downto 0);
    -- regslice io ?  no 
    -- enable regslice on W channel  no 
    component s2mm_gmem_m_axi_fifo is
        generic (
            MEM_STYLE         : STRING  := "shiftreg";
            DATA_WIDTH        : INTEGER := 8;
            ADDR_WIDTH        : INTEGER := 4;
            DEPTH             : INTEGER := 16);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            if_full_n         : out STD_LOGIC;
            if_write          : in  STD_LOGIC;
            if_din            : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            if_empty_n        : out STD_LOGIC;
            if_read           : in  STD_LOGIC;
            if_dout           : out UNSIGNED(DATA_WIDTH-1 downto 0);
            if_num_data_valid : out UNSIGNED(ADDR_WIDTH downto 0));
    end component s2mm_gmem_m_axi_fifo;


begin
    -- Instantiation

    fifo_wreq : s2mm_gmem_m_axi_fifo
    generic map (
        DATA_WIDTH        => WREQ_PACK_WIDTH,
        ADDR_WIDTH        => log2(USER_MAXREQS),
        DEPTH             => USER_MAXREQS)
    port map (
        clk               => ACLK,
        reset             => ARESET,
        clk_en            => ACLK_EN,
        if_full_n         => out_HLS_AWREADY,
        if_write          => in_HLS_AWVALID,
        if_din            => in_wreq_pack,
        if_empty_n        => wreq_valid,
        if_read           => next_wreq,
        if_dout           => out_wreq_pack,
        if_num_data_valid => open);

    -- ===================================================================
    -- start of AWADDR PREPROCESSOR
    in_wreq_pack    <= in_HLS_AWLEN & in_HLS_AWADDR;
    wreq_len        <= out_wreq_pack(USER_AW + USER_LEN_WIDTH-1 downto USER_AW);
    wreq_addr       <= out_wreq_pack(USER_AW - 1 downto 0);
    next_wreq       <= wreq_valid and ready_for_wreq and wrsp_ready;
    ready_for_wreq  <= not tmp_valid or (in_AXI_AWREADY and wreq_ready);
    valid_length    <= '1' when wreq_len /= 0 and wreq_len(USER_LEN_WIDTH-1) = '0' else '0';
   
    out_AXI_AWLEN   <= tmp_len;   -- Byte length
    out_AXI_AWADDR  <= tmp_addr;  -- Byte address
    out_AXI_AWVALID <= tmp_valid and wreq_ready;

    process (ACLK)
    begin
        if (ACLK'event and ACLK = '1') then
            if (ARESET = '1') then
                tmp_addr  <= (others => '0');
                tmp_len   <= (others => '0');
            elsif ACLK_EN = '1' then
                if (next_wreq = '1') then
                    tmp_addr  <= TARGET_ADDR + SHIFT_LEFT(RESIZE(wreq_addr, BUS_ADDR_WIDTH), USER_ADDR_ALIGN);
                    tmp_len   <= SHIFT_LEFT(wreq_len, USER_ADDR_ALIGN) - 1;
                end if;
            end if;
        end if;
    end process;

    process (ACLK)
    begin
        if (ACLK'event and ACLK = '1') then
            if (ARESET = '1') then
                tmp_valid  <= '0';
            elsif ACLK_EN = '1' then
                if (next_wreq and valid_length) = '1' then
                    tmp_valid <= '1';
                elsif (in_AXI_AWREADY and wreq_ready) = '1' then
                    tmp_valid <= '0';
                end if;
            end if;
        end if;
    end process;

    -- end of AWADDR PREPROCESSOR
    -- ===================================================================
    -- ===================================================================
    -- start of WRITE BURST throttling control

    aggressive_gen : if (CONSERVATIVE = 0) generate
    begin
        out_BURST_WLEN   <= (others => '0');
        out_BURST_WVALID <= '0';
    end generate aggressive_gen;
    
    conservative_gen : if (CONSERVATIVE /= 0) generate
        signal local_BURST_WLEN   : UNSIGNED(7 downto 0);
        signal local_BURST_WVALID : STD_LOGIC;
        signal next_burst         : STD_LOGIC;
        signal burst_len          : UNSIGNED(7 downto 0);
        signal burst_valid        : STD_LOGIC;
        signal burst_ready        : STD_LOGIC;
        signal beat_write         : STD_LOGIC;
        signal num_beat_pred_br11 : UNSIGNED(BURST_LEN_WIDTH downto 0);
        signal num_beat_pred_br10 : UNSIGNED(BURST_LEN_WIDTH downto 0);
        signal num_beat_pred_br01 : UNSIGNED(BURST_LEN_WIDTH downto 0);
        signal num_beat_pred_br00 : UNSIGNED(BURST_LEN_WIDTH downto 0);
        signal num_beat_pred      : UNSIGNED(BURST_LEN_WIDTH-1 downto 0);
        signal num_beat_cnt       : UNSIGNED(BURST_LEN_WIDTH-1 downto 0);
    begin
        
        fifo_burst : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH        => 8,
            ADDR_WIDTH        => log2(NUM_WRITE_OUTSTANDING),
            DEPTH             => NUM_WRITE_OUTSTANDING)
        port map (
            clk               => ACLK,
            reset             => ARESET,
            clk_en            => ACLK_EN,
            if_full_n         => open,
            if_write          => in_BURST_AWVALID,
            if_din            => in_BURST_AWLEN,
            if_empty_n        => burst_valid,
            if_read           => next_burst,
            if_dout           => burst_len,
            if_num_data_valid => open);
        
        out_BURST_WLEN        <= local_BURST_WLEN;
        out_BURST_WVALID      <= local_BURST_WVALID;
        burst_ready           <= (not local_BURST_WVALID) or in_BURST_WREADY;
        next_burst            <= burst_valid and burst_ready and (not num_beat_pred_br10(BURST_LEN_WIDTH));
        beat_write            <= local_AXI_WVALID and local_AXI_WREADY;

        num_beat_pred_br11    <= ('0' & num_beat_cnt) - burst_len;
        num_beat_pred_br10    <= ('0' & num_beat_cnt) - burst_len - 1;
        num_beat_pred_br01    <= ('0' & num_beat_cnt) + 1;
        num_beat_pred_br00    <= ('0' & num_beat_cnt);
        num_beat_pred         <= num_beat_pred_br11(BURST_LEN_WIDTH-1 downto 0) when (next_burst = '1' and beat_write = '1') else
                                 num_beat_pred_br10(BURST_LEN_WIDTH-1 downto 0) when (next_burst = '1' and beat_write = '0') else
                                 num_beat_pred_br01(BURST_LEN_WIDTH-1 downto 0) when (next_burst = '0' and beat_write = '1') else
                                 num_beat_pred_br00(BURST_LEN_WIDTH-1 downto 0);

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    local_BURST_WLEN <= (others=>'0');
                elsif ACLK_EN = '1' then
                    if next_burst = '1' then
                        local_BURST_WLEN <= burst_len;
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    local_BURST_WVALID <= '0';
                elsif ACLK_EN = '1' then
                    if next_burst = '1' then
                        local_BURST_WVALID <= '1';
                    elsif in_BURST_WREADY = '1' then
                        local_BURST_WVALID <= '0';
                    end if;
                end if;
            end if;
        end process;
    
        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    num_beat_cnt <= (others=>'0');
                elsif ACLK_EN = '1' then
                    num_beat_cnt <= num_beat_pred;
                end if;
            end if;
        end process;
    
    end generate conservative_gen;

    -- end of WRITE BURST throttling control
    -- ===================================================================
    -- W channel regslice 

    -- W channel fifo
    buff_wdata : s2mm_gmem_m_axi_fifo
    generic map (
        MEM_STYLE         => PREFERRED_IMPL,
        DATA_WIDTH        => BUS_DATA_WIDTH + BUS_DATA_BYTES,
        ADDR_WIDTH        => log2(WBUFF_DEPTH),
        DEPTH             => WBUFF_DEPTH)
    port map (
        clk               => ACLK,
        reset             => ARESET,
        clk_en            => ACLK_EN,
        if_full_n         => local_AXI_WREADY,
        if_write          => local_AXI_WVALID,
        if_din            => local_AXI_WDATA_PACK,
        if_empty_n        => out_AXI_WVALID,
        if_read           => in_AXI_WREADY,
        if_dout           => out_AXI_WDATA_PACK,
        if_num_data_valid => open);

    -- ===================================================================
    -- start of WDATA PREPROCESSOR
    out_AXI_WDATA         <= out_AXI_WDATA_PACK(BUS_DATA_WIDTH-1 downto 0);
    out_AXI_WSTRB         <= out_AXI_WDATA_PACK(BUS_DATA_WIDTH+BUS_DATA_BYTES-1 downto BUS_DATA_WIDTH);
    bus_equal_gen : if (USER_DATA_ALIGN = BUS_DATA_WIDTH) generate
    begin
        local_AXI_WDATA_PACK <= RESIZE(in_HLS_WSTRB, BUS_DATA_BYTES) & RESIZE(in_HLS_WDATA, BUS_DATA_WIDTH);
        local_AXI_WVALID  <= in_HLS_WVALID;
        out_HLS_WREADY    <= local_AXI_WREADY;
        wreq_ready        <= '1';
    end generate bus_equal_gen;

    bus_wide_gen : if (USER_DATA_ALIGN < BUS_DATA_WIDTH) generate
        constant TOTAL_PADS     : INTEGER := BUS_DATA_WIDTH / USER_DATA_ALIGN;
        constant PAD_ALIGN      : INTEGER := log2(TOTAL_PADS);
        constant BEAT_LEN_WIDTH : INTEGER := USER_LEN_WIDTH - BUS_ADDR_ALIGN;
        constant OFFSET_PACK_WIDTH : INTEGER := 2*PAD_ALIGN + BEAT_LEN_WIDTH;
        constant WBUFF_IN_DEPTH : INTEGER := MAX_WRITE_BURST_LENGTH * TOTAL_PADS;

        function decoder (din : UNSIGNED(PAD_ALIGN-1 downto 0)) return UNSIGNED is 
            variable dout : UNSIGNED(TOTAL_PADS-1 downto 0);
        begin
            dout := (others=>'0');
            if ( not (din = 0)) then
                dout(TO_INTEGER(din)-1 downto 0) := (others=>'1');
            end if;
            return dout;
        end function decoder;

        signal  tmp_addr_end    : UNSIGNED(BUS_ADDR_ALIGN-1 downto 0);
        signal  offset_full_n   : STD_LOGIC;
        signal  offset_write    : STD_LOGIC;
        signal  offset_empty_n  : STD_LOGIC;
        signal  offset_read     : STD_LOGIC;
        signal  offset_valid    : STD_LOGIC;
        signal  next_offset     : STD_LOGIC;

        signal  offset_pack_in  : UNSIGNED(OFFSET_PACK_WIDTH-1 downto 0);
        signal  offset_pack_out : UNSIGNED(OFFSET_PACK_WIDTH-1 downto 0);
        signal  offset_pack_buf : UNSIGNED(OFFSET_PACK_WIDTH-1 downto 0);
        signal  start_offset    : UNSIGNED(PAD_ALIGN-1 downto 0);
        signal  end_offset      : UNSIGNED(PAD_ALIGN-1 downto 0);
        signal  head_offset     : UNSIGNED(PAD_ALIGN-1 downto 0);
        signal  tail_offset     : UNSIGNED(PAD_ALIGN-1 downto 0);
        signal  align_len       : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);
        signal  wdata_len       : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);
        signal  total_len       : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);
        signal  beat_len        : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);
        signal  beat_len_cnt    : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);

        signal  data_buf        : UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
        signal  strb_buf        : UNSIGNED(BUS_DATA_BYTES-1 downto 0);
        signal  data_valid      : STD_LOGIC;

        signal  tmp_wdata       : UNSIGNED(USER_DATA_WIDTH-1 downto 0);
        signal  tmp_wstrb       : UNSIGNED(USER_DATA_BYTES-1 downto 0);

        signal  in_wdata_pack   : UNSIGNED(USER_DW+USER_DW/8-1 downto 0);
        signal  in_wdata_vld    : STD_LOGIC;
        signal  out_wdata_rdy   : STD_LOGIC;
        
        signal  local_HLS_WDATA_PACK : UNSIGNED(USER_DW+USER_DW/8-1 downto 0);
        signal  local_HLS_WVALID : STD_LOGIC;
        signal  local_HLS_WREADY : STD_LOGIC;

        signal  add_head        : UNSIGNED(TOTAL_PADS-1 downto 0); 
        signal  add_tail        : UNSIGNED(TOTAL_PADS-1 downto 0); 
        signal  head_pad_sel    : UNSIGNED(TOTAL_PADS-1 downto 0); 
        signal  tail_pad_sel    : UNSIGNED(0 to TOTAL_PADS-1);
        signal  pad_oh          : UNSIGNED(TOTAL_PADS - 1 downto 0);
        signal  pad_oh_reg      : UNSIGNED(TOTAL_PADS - 1 downto 0);
        signal  first_pad       : BOOLEAN;
        signal  next_pad        : BOOLEAN;
        signal  last_pad        : BOOLEAN;

        signal  first_beat_set  : BOOLEAN;
        signal  last_beat_set   : BOOLEAN;
        signal  last_beat_pred  : BOOLEAN;
        signal  single_beat_pred: BOOLEAN;
        signal  single_beat     : BOOLEAN;
        signal  ready_for_data  : BOOLEAN;
        signal  first_beat      : BOOLEAN;
        signal  next_beat       : BOOLEAN;
        signal  last_beat       : BOOLEAN;
    begin
        wreq_offset : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH        => OFFSET_PACK_WIDTH,
            ADDR_WIDTH        => log2(NUM_WRITE_OUTSTANDING),
            DEPTH             => NUM_WRITE_OUTSTANDING)
        port map (
            clk               => ACLK,
            reset             => ARESET,
            clk_en            => ACLK_EN,
            if_full_n         => offset_full_n,
            if_write          => offset_write,
            if_din            => offset_pack_in,
            if_empty_n        => offset_empty_n,
            if_read           => offset_read,
            if_dout           => offset_pack_out,
            if_num_data_valid => open);

        buff_wdata_in : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH        => USER_DW + USER_DW/8,
            ADDR_WIDTH        => log2(WBUFF_IN_DEPTH),
            DEPTH             => WBUFF_IN_DEPTH)
        port map (
            clk               => ACLK,
            reset             => ARESET,
            clk_en            => ACLK_EN,
            if_full_n         => out_wdata_rdy,
            if_write          => in_wdata_vld,
            if_din            => in_wdata_pack,
            if_empty_n        => local_HLS_WVALID,
            if_read           => local_HLS_WREADY,
            if_dout           => local_HLS_WDATA_PACK,
            if_num_data_valid => open);
        
        wreq_ready       <= '1' when offset_full_n = '1' or offset_write = '0' else '0';
        tmp_addr_end     <= tmp_addr(BUS_ADDR_ALIGN-1 downto 0) + tmp_len(BUS_ADDR_ALIGN-1 downto 0);
        start_offset     <= tmp_addr(BUS_ADDR_ALIGN-1 downto USER_ADDR_ALIGN);
        end_offset       <= not(tmp_addr_end(BUS_ADDR_ALIGN-1 downto USER_ADDR_ALIGN));
        align_len        <= RESIZE(SHIFT_RIGHT(tmp_len + tmp_addr(BUS_ADDR_ALIGN-1 downto 0), BUS_ADDR_ALIGN), BEAT_LEN_WIDTH);
        offset_write     <= tmp_valid and in_AXI_AWREADY;
        offset_read      <= not offset_valid or next_offset;

        offset_pack_in   <= start_offset & end_offset & align_len;
        head_offset      <= offset_pack_buf(OFFSET_PACK_WIDTH-1 downto BEAT_LEN_WIDTH+PAD_ALIGN);
        tail_offset      <= offset_pack_buf(BEAT_LEN_WIDTH+PAD_ALIGN-1 downto BEAT_LEN_WIDTH);
        total_len        <= offset_pack_buf(BEAT_LEN_WIDTH-1 downto 0);
        wdata_len        <= offset_pack_out(BEAT_LEN_WIDTH-1 downto 0);

        in_wdata_pack     <= in_HLS_WSTRB & in_HLS_WDATA;
        in_wdata_vld      <= in_HLS_WVALID;
        out_HLS_WREADY    <= out_wdata_rdy;
        local_AXI_WDATA_PACK <= strb_buf & data_buf;
        local_AXI_WVALID <= data_valid;
        local_HLS_WREADY <= '1' when offset_valid = '1' and ready_for_data else '0';

        tmp_wdata        <= RESIZE(local_HLS_WDATA_PACK(USER_DW-1 downto 0), USER_DATA_WIDTH);
        tmp_wstrb        <= RESIZE(local_HLS_WDATA_PACK(USER_DW+USER_DW/8-1 downto USER_DW), USER_DATA_BYTES);

        next_offset      <= '1' when last_beat and next_beat else '0';
        ready_for_data   <= data_valid = '0' or local_AXI_WREADY = '1';

        beat_len         <= total_len when first_beat else beat_len_cnt;
        first_beat       <= first_beat_set and (offset_valid = '1');
        last_beat        <= (single_beat or last_beat_set) and (offset_valid = '1');
        next_beat        <= local_HLS_WREADY = '1' and last_pad;

        single_beat_pred <= (wdata_len = 0);
        last_beat_pred   <= (beat_len = 1);

        next_pad         <= local_HLS_WREADY = '1' and local_HLS_WVALID = '1';
        last_pad         <= pad_oh(TOTAL_PADS - to_integer(tail_offset) - 1) = '1' when last_beat else
                            pad_oh(TOTAL_PADS - 1) = '1';
        head_pad_sel     <= decoder(head_offset);
        tail_pad_sel     <= decoder(tail_offset);
        pad_oh           <= (others => '0')                                                 when local_HLS_WVALID = '0' else
                            SHIFT_LEFT(TO_UNSIGNED(1, TOTAL_PADS), TO_INTEGER(head_offset)) when first_beat and first_pad else
                            TO_UNSIGNED(1, TOTAL_PADS)                                      when first_pad else
                            pad_oh_reg;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    single_beat <= false;
                    offset_pack_buf <= (others => '0');
                elsif ACLK_EN = '1' then
                    if offset_empty_n = '1' and offset_read = '1' then
                            single_beat <= single_beat_pred;
                            offset_pack_buf <= offset_pack_out;
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    offset_valid <= '0';
                elsif ACLK_EN = '1' then
                    if offset_empty_n = '1' and offset_read = '1' then
                        offset_valid <= '1';
                    elsif next_offset = '1' then
                        offset_valid <= '0';
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    first_beat_set <= true;
                    last_beat_set  <= false;
                elsif ACLK_EN = '1' then
                    if next_offset = '1' then
                        first_beat_set <= true;
                        last_beat_set  <= false;
                    elsif next_beat then
                        first_beat_set <= false;
                        last_beat_set  <= last_beat_pred;
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    beat_len_cnt <= (others => '0');
                elsif ACLK_EN = '1' then
                    if next_beat then
                        beat_len_cnt <= beat_len - 1;
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    pad_oh_reg <= (others => '0');
                elsif ACLK_EN = '1' then
                    if next_pad then
                        pad_oh_reg <= pad_oh(TOTAL_PADS - 2 downto 0) & '0';
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    first_pad <= true;
                elsif ACLK_EN = '1' then
                    if next_pad and not last_pad then
                        first_pad <= false;
                    elsif next_pad and last_pad then
                        first_pad <= true;
                    end if;
                end if;
            end if;
        end process;

        data_gen : for i in 0 to TOTAL_PADS - 1 generate
        begin
            add_head(i) <= '1' when head_pad_sel(i) = '1' and first_beat else '0';
            add_tail(i) <= '1' when tail_pad_sel(i) = '1' and last_beat else '0';

            process (ACLK)
            begin
                if (ACLK'event and ACLK = '1') then
                    if (ARESET = '1') then
                        data_buf((i+1)*USER_DATA_WIDTH - 1 downto i*USER_DATA_WIDTH) <= (others => '0'); 
                    elsif (ACLK_EN = '1') then
                        if (add_head(i) = '1' or add_tail(i) = '1') and ready_for_data then
                            data_buf((i+1)*USER_DATA_WIDTH - 1 downto i*USER_DATA_WIDTH) <= (others => '0');
                        elsif pad_oh(i) = '1' and ready_for_data then
                            data_buf((i+1)*USER_DATA_WIDTH - 1 downto i*USER_DATA_WIDTH) <= tmp_wdata;
                        end if;
                    end if;
                end if;
            end process;

            process (ACLK)
            begin
                if (ACLK'event and ACLK = '1') and ACLK_EN = '1' then
                    if (ARESET = '1') then
                        strb_buf((i+1)*USER_DATA_BYTES - 1 downto i*USER_DATA_BYTES) <= (others => '0');
                    elsif (ACLK_EN = '1') then
                        if (add_head(i) = '1' or add_tail(i) = '1') and ready_for_data then
                            strb_buf((i+1)*USER_DATA_BYTES - 1 downto i*USER_DATA_BYTES) <= (others => '0');
                        elsif pad_oh(i) = '1' and ready_for_data then
                            strb_buf((i+1)*USER_DATA_BYTES - 1 downto i*USER_DATA_BYTES) <= tmp_wstrb;
                        end if;
                    end if;
                end if;
            end process;
        end generate data_gen;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    data_valid <= '0';
                elsif ACLK_EN = '1' then
                    if next_beat then
                        data_valid <= '1';
                    elsif ready_for_data then
                        data_valid <= '0';
                    end if;
                end if;
            end if;
        end process;

    end generate bus_wide_gen; 

    bus_narrow_gen : if (USER_DATA_ALIGN > BUS_DATA_WIDTH) generate
        constant TOTAL_SPLIT    : INTEGER := USER_DATA_ALIGN / BUS_DATA_WIDTH;
        constant SPLIT_ALIGN    : INTEGER := log2(TOTAL_SPLIT);
        constant BEAT_LEN_WIDTH : INTEGER := USER_LEN_WIDTH - BUS_ADDR_ALIGN;
        constant WBUFF_IN_DEPTH : INTEGER := max(MAX_WRITE_BURST_LENGTH / TOTAL_SPLIT, 1);

        signal  offset_full_n   : STD_LOGIC;
        signal  offset_write    : STD_LOGIC;
        signal  offset_valid    : STD_LOGIC;
        signal  next_offset     : STD_LOGIC;

        signal  align_len       : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);
        signal  beat_len        : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);
        signal  beat_len_cnt    : UNSIGNED(BEAT_LEN_WIDTH-1 downto 0);

        signal  data_buf        : UNSIGNED(USER_DATA_WIDTH - 1 downto 0);
        signal  strb_buf        : UNSIGNED(USER_DATA_BYTES - 1 downto 0);
        signal  data_valid      : STD_LOGIC;

        signal  tmp_wdata       : UNSIGNED(USER_DATA_WIDTH-1 downto 0);
        signal  tmp_wstrb       : UNSIGNED(USER_DATA_BYTES-1 downto 0);

        signal  in_wdata_pack   : UNSIGNED(USER_DW+USER_DW/8-1 downto 0);
        signal  in_wdata_vld    : STD_LOGIC;
        signal  out_wdata_rdy   : STD_LOGIC;
        
        signal  local_HLS_WDATA_PACK : UNSIGNED(USER_DW+USER_DW/8-1 downto 0);
        signal  local_HLS_WVALID : STD_LOGIC;
        signal  local_HLS_WREADY : STD_LOGIC;

        signal  split_cnt       : UNSIGNED(SPLIT_ALIGN - 1 downto 0);
        signal  first_split_pred: BOOLEAN;
        signal  first_split     : BOOLEAN;
        signal  next_split      : BOOLEAN;
        signal  last_split      : BOOLEAN;
        signal  ready_for_data  : BOOLEAN;
    begin
        -- instantiation
        wreq_offset : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH        => BEAT_LEN_WIDTH,
            ADDR_WIDTH        => log2(NUM_WRITE_OUTSTANDING),
            DEPTH             => NUM_WRITE_OUTSTANDING)
        port map (
            clk               => ACLK,
            reset             => ARESET,
            clk_en            => ACLK_EN,
            if_full_n         => offset_full_n,
            if_write          => offset_write,
            if_din            => align_len,
            if_empty_n        => offset_valid,
            if_read           => next_offset,
            if_dout           => beat_len,
            if_num_data_valid => open);

        buff_wdata_in : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH        => USER_DW + USER_DW/8,
            ADDR_WIDTH        => log2(WBUFF_IN_DEPTH),
            DEPTH             => WBUFF_IN_DEPTH)
        port map (
            clk               => ACLK,
            reset             => ARESET,
            clk_en            => ACLK_EN,
            if_full_n         => out_wdata_rdy,
            if_write          => in_wdata_vld,
            if_din            => in_wdata_pack,
            if_empty_n        => local_HLS_WVALID,
            if_read           => local_HLS_WREADY,
            if_dout           => local_HLS_WDATA_PACK,
            if_num_data_valid => open);
        
        wreq_ready       <= offset_full_n and not offset_write;
        align_len        <= RESIZE(SHIFT_RIGHT(tmp_len + tmp_addr(BUS_ADDR_ALIGN-1 downto 0), BUS_ADDR_ALIGN), BEAT_LEN_WIDTH);
        offset_write     <= tmp_valid and in_AXI_AWREADY;

        in_wdata_pack    <= in_HLS_WSTRB & in_HLS_WDATA;
        in_wdata_vld     <= in_HLS_WVALID;
        out_HLS_WREADY   <= out_wdata_rdy;
        local_AXI_WDATA_PACK <= strb_buf & data_buf;
        local_AXI_WVALID <= data_valid;
        local_HLS_WREADY <= '1' when first_split_pred and (offset_valid ='1') and ready_for_data else '0';

        tmp_wdata        <= RESIZE(local_HLS_WDATA_PACK(USER_DW-1 downto 0), USER_DATA_WIDTH);
        tmp_wstrb        <= RESIZE(local_HLS_WDATA_PACK(USER_DW+USER_DW/8-1 downto USER_DW), USER_DATA_BYTES);

        next_offset      <= '1' when (beat_len_cnt = beat_len) and (offset_valid = '1') and last_split else '0';
        ready_for_data   <= data_valid = '0' or local_AXI_WREADY = '1';

        first_split      <= local_HLS_WVALID = '1' and local_HLS_WREADY ='1';
        next_split       <= not first_split_pred and ready_for_data;
        last_split       <= (split_cnt = (TOTAL_SPLIT - 1)) and ready_for_data;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    first_split_pred <= true;
                    split_cnt <= (others => '0');
                elsif ACLK_EN = '1' then
                    if last_split then
                        first_split_pred <= true;
                        split_cnt <= (others => '0');
                    elsif first_split or next_split then
                        first_split_pred <= false;
                        split_cnt <= split_cnt + 1;
                    end if;
                end if;
            end if;
        end process ;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    beat_len_cnt <= (others => '0');
                elsif ACLK_EN = '1' then
                    if next_offset = '1' then
                        beat_len_cnt <= (others => '0');
                    elsif first_split or next_split then
                        beat_len_cnt <= beat_len_cnt + 1;
                    end if;
                end if;
            end if;
        end process ;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    data_buf <= (others => '0');
                elsif ACLK_EN = '1' then
                    if first_split then
                        data_buf <= tmp_wdata;
                    elsif next_split then
                        data_buf <= SHIFT_RIGHT(data_buf, BUS_DATA_WIDTH);
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    strb_buf <= (others => '0');
                elsif ACLK_EN = '1' then
                    if first_split then
                        strb_buf <= tmp_wstrb;
                    elsif next_split then
                        strb_buf <= SHIFT_RIGHT(strb_buf, BUS_DATA_BYTES);
                    end if;
                end if;
            end if;
        end process;

        process (ACLK)
        begin
            if (ACLK'event and ACLK = '1') then
                if (ARESET = '1') then
                    data_valid <= '0';
                elsif ACLK_EN = '1' then
                    if first_split then
                        data_valid <= '1';
                    elsif not next_split and ready_for_data then
                        data_valid <= '0';
                    end if;
                end if;
            end if;
        end process;

    end generate bus_narrow_gen;

    -- ===================================================================
    -- generate response for all request (including request with invalid length)
    fifo_wrsp : s2mm_gmem_m_axi_fifo
    generic map (
        DATA_WIDTH        => 1,
        ADDR_WIDTH        => log2(NUM_WRITE_OUTSTANDING),
        DEPTH             => NUM_WRITE_OUTSTANDING)
    port map (
        clk               => ACLK,
        reset             => ARESET,
        clk_en            => ACLK_EN,
        if_full_n         => wrsp_ready,
        if_write          => next_wreq,
        if_din            => in_wrsp_type,
        if_empty_n        => wrsp_valid,
        if_read           => wrsp_read,
        if_dout           => wrsp_type,  -- "1" for valid length request, "0" for invalid length request
        if_num_data_valid => open);
    
    user_resp : s2mm_gmem_m_axi_fifo
    generic map (
        DATA_WIDTH        => 1,
        ADDR_WIDTH        => log2(USER_MAXREQS),
        DEPTH             => USER_MAXREQS)
    port map (
        clk               => ACLK,
        reset             => ARESET,
        clk_en            => ACLK_EN,
        if_full_n         => ursp_ready,
        if_write          => ursp_write,
        if_din            => "0",
        if_empty_n        => out_HLS_BVALID,
        if_read           => in_HLS_BREADY,
        if_dout           => open,
        if_num_data_valid => open);


    out_AXI_BREADY <= wrsp_type(0) and ursp_ready;

    in_wrsp_type   <= "1" when valid_length = '1' else "0";
    ursp_write     <= wrsp_valid and (not wrsp_type(0) or in_AXI_BVALID);
    wrsp_read      <= ursp_ready and ursp_write;

end architecture behave;



-- 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689




library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_write is
    generic (
        CONSERVATIVE              : INTEGER := 0;
        C_M_AXI_ID_WIDTH          : INTEGER := 1;
        C_M_AXI_AWUSER_WIDTH      : INTEGER := 1;
        C_M_AXI_WUSER_WIDTH       : INTEGER := 1;
        C_M_AXI_BUSER_WIDTH       : INTEGER := 1;
        C_USER_VALUE              : INTEGER := 0;
        C_PROT_VALUE              : INTEGER := 0;
        C_CACHE_VALUE             : INTEGER := 2#0011#;
        BUS_ADDR_WIDTH            : INTEGER := 32;
        BUS_DATA_WIDTH            : INTEGER := 32;
        USER_LEN_WIDTH            : INTEGER := 32;
        MAX_WRITE_BURST_LENGTH    : INTEGER := 16;
        NUM_WRITE_OUTSTANDING     : INTEGER := 2;
        ID0_NUM_WRITE_OUTSTANDING : INTEGER := 2;
        NUM_WRITE_PORTS           : INTEGER := 1);
    port (
        ACLK                      : in  STD_LOGIC;
        ARESET                    : in  STD_LOGIC;
        ACLK_EN                   : in  STD_LOGIC;
        out_BUS_AWID              : out UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
        out_BUS_AWADDR            : out UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
        out_BUS_AWLEN             : out UNSIGNED(7 downto 0);
        out_BUS_AWSIZE            : out UNSIGNED(2 downto 0);
        out_BUS_AWBURST           : out UNSIGNED(1 downto 0);
        out_BUS_AWLOCK            : out UNSIGNED(1 downto 0);
        out_BUS_AWCACHE           : out UNSIGNED(3 downto 0);
        out_BUS_AWPROT            : out UNSIGNED(2 downto 0);
        out_BUS_AWQOS             : out UNSIGNED(3 downto 0);
        out_BUS_AWREGION          : out UNSIGNED(3 downto 0);
        out_BUS_AWUSER            : out UNSIGNED(C_M_AXI_AWUSER_WIDTH-1 downto 0);
        out_BUS_AWVALID           : out STD_LOGIC;
        in_BUS_AWREADY            : in  STD_LOGIC;
        out_BUS_WID               : out UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
        out_BUS_WDATA             : out UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
        out_BUS_WSTRB             : out UNSIGNED(BUS_DATA_WIDTH/8-1 downto 0);
        out_BUS_WLAST             : out STD_LOGIC;
        out_BUS_WUSER             : out UNSIGNED(C_M_AXI_WUSER_WIDTH-1 downto 0);
        out_BUS_WVALID            : out STD_LOGIC;
        in_BUS_WREADY             : in  STD_LOGIC;
        in_BUS_BID                : in  UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
        in_BUS_BRESP              : in  UNSIGNED(1 downto 0);
        in_BUS_BUSER              : in  UNSIGNED(C_M_AXI_BUSER_WIDTH-1 downto 0);
        in_BUS_BVALID             : in  STD_LOGIC;
        out_BUS_BREADY            : out STD_LOGIC;
        in_AXI_AWID               : in  UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
        in_AXI_AWADDR             : in  UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
        in_AXI_AWLEN              : in  UNSIGNED(USER_LEN_WIDTH-1 downto 0);
        in_AXI_AWVALID            : in  STD_LOGIC;
        out_AXI_AWREADY           : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
        out_AXI_WID               : out UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
        in_AXI_WDATA              : in  UNSIGNED(BUS_DATA_WIDTH-1 downto 0);
        in_AXI_WSTRB              : in  UNSIGNED(BUS_DATA_WIDTH/8-1 downto 0);
        in_AXI_WVALID             : in  STD_LOGIC;
        out_AXI_WREADY            : out STD_LOGIC;
        out_AXI_BVALID            : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
        in_AXI_BREADY             : in  UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
        out_BURST_AWLEN           : out UNSIGNED(7 downto 0);
        out_BURST_AWVALID         : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
        in_BURST_WID              : in  UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
        in_BURST_WLEN             : in  UNSIGNED(7 downto 0);
        in_BURST_WVALID           : in  STD_LOGIC;
        out_BURST_WREADY          : out UNSIGNED(NUM_WRITE_PORTS-1 downto 0));
end entity s2mm_gmem_m_axi_write;

architecture behave of s2mm_gmem_m_axi_write is

    function log2 (x : INTEGER) return INTEGER is
        variable n, m : INTEGER;
    begin
        n := 0;
        m := 1;
        while m < x loop
            n := n + 1;
            m := m * 2;
        end loop;
        return n;
    end function log2;

    function bit_set (idx : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0); valid : STD_LOGIC) 
    return UNSIGNED is
        variable ret : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    begin
        ret := (others=>'0');
        ret(TO_INTEGER(idx)) := valid;
        return ret;
    end function bit_set;

    function num_outstanding_val (idx : INTEGER)
    return INTEGER is
        variable ret : INTEGER;
    begin
        case (idx) is
            when 0 => ret := ID0_NUM_WRITE_OUTSTANDING;
            when others => ret := NUM_WRITE_OUTSTANDING;
        end case;
        return ret;
    end function num_outstanding_val;

    -- Convert the actual AXI ID to the ID locally used by the write module
    function compress_axi_id (axi_id : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0))
    return UNSIGNED is
        variable local_id : INTEGER;
    begin
        case TO_INTEGER(axi_id) is
            when 0 => local_id := 0;
            when others => local_id := 0;
        end case;
        return TO_UNSIGNED(local_id, C_M_AXI_ID_WIDTH);
    end function compress_axi_id;

    -- Convert the ID locally used by the write module to the actual AXI ID
    function decompress_axi_id (local_id : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0))
    return UNSIGNED is
        variable axi_id : INTEGER;
    begin
        case TO_INTEGER(local_id) is
            when 0 => axi_id := 0;
            when others => axi_id := 0;
        end case;
        return TO_UNSIGNED(axi_id, C_M_AXI_ID_WIDTH);
    end function decompress_axi_id;

    --common
    constant BUS_DATA_BYTES       : INTEGER := BUS_DATA_WIDTH / 8;
    constant BUS_ADDR_ALIGN       : INTEGER := log2(BUS_DATA_BYTES);

    signal  local_BUS_AWID        : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal  local_BUS_BID         : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal  local_BUS_WID         : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);

    signal  local_BURST_AWID      : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal  local_BURST_AWADDR    : UNSIGNED(BUS_ADDR_WIDTH-1 downto 0);
    signal  local_BURST_AWLEN     : UNSIGNED(7 downto 0);
    signal  local_BURST_AWVALID   : STD_LOGIC;
    signal  local_BURST_AWREADY   : STD_LOGIC;
    signal  local_BURST_WREADY    : STD_LOGIC;

    signal  ost_ctrl_id           : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal  ost_ctrl_info         : UNSIGNED(0 downto 0);
    signal  ost_ctrl_valid        : STD_LOGIC;
    signal  ost_ctrl_pack         : UNSIGNED(C_M_AXI_ID_WIDTH+7 downto 0);
    signal  ost_ctrl_write        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal  ost_ctrl_ready        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);

    signal  resp_id               : UNSIGNED(C_M_AXI_ID_WIDTH-1 downto 0);
    signal  resp_valid            : STD_LOGIC;
    signal  resp_ready            : STD_LOGIC;
    signal  next_resp             : STD_LOGIC;

    signal  ost_resp_valid        : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal  ost_resp_info         : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);
    signal  ost_resp_read         : UNSIGNED(NUM_WRITE_PORTS-1 downto 0);

    -- regslice io ?  no 
    
    -- component
    component s2mm_gmem_m_axi_fifo is
        generic (
            MEM_STYLE         : STRING  := "shiftreg";
            DATA_WIDTH        : INTEGER := 8;
            ADDR_WIDTH        : INTEGER := 4;
            DEPTH             : INTEGER := 16);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            if_full_n         : out STD_LOGIC;
            if_write          : in  STD_LOGIC;
            if_din            : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            if_empty_n        : out STD_LOGIC;
            if_read           : in  STD_LOGIC;
            if_dout           : out UNSIGNED(DATA_WIDTH-1 downto 0);
            if_num_data_valid : out UNSIGNED(ADDR_WIDTH downto 0));
    end component s2mm_gmem_m_axi_fifo;

    component s2mm_gmem_m_axi_reg_slice is
        generic (
            DATA_WIDTH  : INTEGER := 8);
        port (
            clk         : in  STD_LOGIC;
            reset       : in  STD_LOGIC;
            s_data      : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            s_valid     : in  STD_LOGIC;
            s_ready     : out STD_LOGIC;
            m_data      : out UNSIGNED(DATA_WIDTH-1 downto 0);
            m_valid     : out STD_LOGIC;
            m_ready     : in  STD_LOGIC);
    end component s2mm_gmem_m_axi_reg_slice;

    component s2mm_gmem_m_axi_burst_converter is
        generic (
            INTERLEAVE        : INTEGER := 1;
            ID_WIDTH          : INTEGER := 1;
            DATA_WIDTH        : INTEGER := 32;
            ADDR_WIDTH        : INTEGER := 32;
            LEN_WIDTH         : INTEGER := 32;
            MAX_BURST_LENGTH  : INTEGER := 16;
            NUM_PORTS         : INTEGER := 1);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            in_REQ_ID         : in  UNSIGNED(ID_WIDTH-1 downto 0);
            in_REQ_ADDR       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
            in_REQ_LEN        : in  UNSIGNED(LEN_WIDTH-1 downto 0);
            in_REQ_VALID      : in  STD_LOGIC;
            out_REQ_READY     : out UNSIGNED(NUM_PORTS-1 downto 0); 
            out_BURST_ID      : out UNSIGNED(ID_WIDTH-1 downto 0);
            out_BURST_ADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
            out_BURST_LEN     : out UNSIGNED(7 downto 0);
            out_BURST_VALID   : out STD_LOGIC;
            in_BURST_READY    : in  STD_LOGIC;
            out_CTRL_ID       : out UNSIGNED(ID_WIDTH-1 downto 0);
            out_CTRL_INFO     : out UNSIGNED(0 downto 0);
            out_CTRL_VALID    : out STD_LOGIC;
            in_CTRL_READY     : in  UNSIGNED(NUM_PORTS-1 downto 0));
    end component s2mm_gmem_m_axi_burst_converter;

    component s2mm_gmem_m_axi_throttle is
        generic (
            CONSERVATIVE      : INTEGER := 0;
            ID_WIDTH          : INTEGER := 1;
            ADDR_WIDTH        : INTEGER := 32;
            DATA_WIDTH        : INTEGER := 32;
            NUM_OUTSTANDING   : INTEGER := 16;
            ID0_NUM_OUTSTANDING : INTEGER := 2;
            NUM_PORTS         : INTEGER := 1);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            in_BURST_AWID     : in  UNSIGNED;
            in_BURST_AWADDR   : in  UNSIGNED;
            in_BURST_AWLEN    : in  UNSIGNED;
            in_BURST_AWVALID  : in  STD_LOGIC;
            out_BURST_AWREADY : out STD_LOGIC;
            in_BURST_WID      : in  UNSIGNED;
            in_BURST_WLEN     : in  UNSIGNED;
            in_BURST_WVALID   : in  STD_LOGIC;
            out_BURST_WREADY  : out STD_LOGIC;
            out_AXI_WID       : out UNSIGNED;
            in_AXI_WDATA      : in  UNSIGNED;
            in_AXI_WSTRB      : in  UNSIGNED;
            in_AXI_WVALID     : in  STD_LOGIC;
            out_AXI_WREADY    : out STD_LOGIC;
            out_BUS_AWID      : out UNSIGNED;
            out_BUS_AWADDR    : out UNSIGNED;
            out_BUS_AWLEN     : out UNSIGNED;
            out_BUS_AWVALID   : out STD_LOGIC;
            in_BUS_AWREADY    : in  STD_LOGIC;
            out_BUS_WID       : out UNSIGNED;
            out_BUS_WDATA     : out UNSIGNED;
            out_BUS_WSTRB     : out UNSIGNED;
            out_BUS_WLAST     : out STD_LOGIC;
            out_BUS_WVALID    : out STD_LOGIC;
            in_BUS_WREADY     : in  STD_LOGIC);
    end component s2mm_gmem_m_axi_throttle;

begin
    --------------------------- BUS global config ----------------------------------
    out_BUS_AWID      <= decompress_axi_id(local_BUS_AWID);
    out_BUS_AWSIZE    <= TO_UNSIGNED(BUS_ADDR_ALIGN, out_BUS_AWSIZE'length);
    out_BUS_AWBURST   <= "01";
    out_BUS_AWLOCK    <= "00";
    out_BUS_AWCACHE   <= TO_UNSIGNED(C_CACHE_VALUE, out_BUS_AWCACHE'length);
    out_BUS_AWPROT    <= TO_UNSIGNED(C_PROT_VALUE, out_BUS_AWPROT'length);
    out_BUS_AWUSER    <= TO_UNSIGNED(C_USER_VALUE, out_BUS_AWUSER'length);
    out_BUS_AWQOS     <= "0000";
    out_BUS_AWREGION  <= "0000";
    out_BUS_WID       <= decompress_axi_id(local_BUS_WID);
    out_BUS_WUSER     <= TO_UNSIGNED(C_USER_VALUE, out_BUS_WUSER'length);
    --------------------------- AW channel begin -----------------------------------
    -- Instantiation
    wreq_burst_conv : s2mm_gmem_m_axi_burst_converter
    generic map (
        ID_WIDTH         => C_M_AXI_ID_WIDTH,
        DATA_WIDTH       => BUS_DATA_WIDTH,
        ADDR_WIDTH       => BUS_ADDR_WIDTH,
        LEN_WIDTH        => USER_LEN_WIDTH,
        MAX_BURST_LENGTH => MAX_WRITE_BURST_LENGTH,
        NUM_PORTS        => NUM_WRITE_PORTS)
    port map (
        clk              => ACLK,
        reset            => ARESET,
        clk_en           => ACLK_EN,
        in_REQ_ID        => in_AXI_AWID,
        in_REQ_ADDR      => in_AXI_AWADDR,
        in_REQ_LEN       => in_AXI_AWLEN,
        in_REQ_VALID     => in_AXI_AWVALID,
        out_REQ_READY    => out_AXI_AWREADY,
        out_BURST_ID     => local_BURST_AWID,
        out_BURST_ADDR   => local_BURST_AWADDR,
        out_BURST_LEN    => local_BURST_AWLEN,
        out_BURST_VALID  => local_BURST_AWVALID,
        in_BURST_READY   => local_BURST_AWREADY,
        out_CTRL_ID      => ost_ctrl_id,
        out_CTRL_INFO    => ost_ctrl_info,
        out_CTRL_VALID   => ost_ctrl_valid,
        in_CTRL_READY    => ost_ctrl_ready);

    out_BURST_AWLEN      <= local_BURST_AWLEN;
    out_BURST_AWVALID    <= bit_set(local_BURST_AWID, local_BURST_AWVALID);
    out_BURST_WREADY     <= (others => local_BURST_WREADY);
    --------------------------- AW channel end -------------------------------------

    --------------------------- W channel begin ------------------------------------
    -- Write throttling instantiation
    wreq_throttl : s2mm_gmem_m_axi_throttle
    generic map (
        CONSERVATIVE      => CONSERVATIVE,
        ID_WIDTH          => C_M_AXI_ID_WIDTH,
        ADDR_WIDTH        => BUS_ADDR_WIDTH,
        DATA_WIDTH        => BUS_DATA_WIDTH,
        NUM_OUTSTANDING   => NUM_WRITE_OUTSTANDING,
        ID0_NUM_OUTSTANDING => ID0_NUM_WRITE_OUTSTANDING,
        NUM_PORTS         => NUM_WRITE_PORTS)
    port map (
        clk               => ACLK,
        reset             => ARESET,
        clk_en            => ACLK_EN,
        -- internal
        in_BURST_AWID     => local_BURST_AWID,
        in_BURST_AWADDR   => local_BURST_AWADDR,
        in_BURST_AWLEN    => local_BURST_AWLEN,
        in_BURST_AWVALID  => local_BURST_AWVALID,
        out_BURST_AWREADY => local_BURST_AWREADY,
        in_BURST_WID      => in_BURST_WID,
        in_BURST_WLEN     => in_BURST_WLEN,
        in_BURST_WVALID   => in_BURST_WVALID,
        out_BURST_WREADY  => local_BURST_WREADY,
        out_AXI_WID       => out_AXI_WID,
        in_AXI_WDATA      => in_AXI_WDATA,
        in_AXI_WSTRB      => in_AXI_WSTRB,
        in_AXI_WVALID     => in_AXI_WVALID,
        out_AXI_WREADY    => out_AXI_WREADY,
        -- AXI BUS 
        out_BUS_AWID      => local_BUS_AWID,
        out_BUS_AWADDR    => out_BUS_AWADDR,
        out_BUS_AWLEN     => out_BUS_AWLEN,
        out_BUS_AWVALID   => out_BUS_AWVALID,
        in_BUS_AWREADY    => in_BUS_AWREADY,
        out_BUS_WID       => local_BUS_WID,
        out_BUS_WDATA     => out_BUS_WDATA,
        out_BUS_WSTRB     => out_BUS_WSTRB,
        out_BUS_WLAST     => out_BUS_WLAST,
        out_BUS_WVALID    => out_BUS_WVALID,
        in_BUS_WREADY     => in_BUS_WREADY
    );

    --------------------------- W channel end --------------------------------------
    --------------------------- B channel begin ------------------------------------
    -- Instantiation
    rs_resp : s2mm_gmem_m_axi_reg_slice
        generic map (
            DATA_WIDTH   => C_M_AXI_ID_WIDTH)
        port map (
            clk          => ACLK,
            reset        => ARESET,
            s_data       => local_BUS_BID,
            s_valid      => in_BUS_BVALID,
            s_ready      => out_BUS_BREADY,
            m_data       => resp_id,
            m_valid      => resp_valid,
            m_ready      => resp_ready);

    fifo_resp_gen : for idx in 0 to NUM_WRITE_PORTS-1 generate
        fifo_resp : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH   => 1,
            ADDR_WIDTH   => log2(num_outstanding_val(idx)),
            DEPTH        => num_outstanding_val(idx))
        port map (
            clk          => ACLK,
            reset        => ARESET,
            clk_en       => ACLK_EN,
            if_full_n    => ost_ctrl_ready(idx),
            if_write     => ost_ctrl_write(idx),
            if_din       => ost_ctrl_info,
            if_empty_n   => ost_resp_valid(idx),
            if_read      => ost_resp_read(idx),
            if_dout      => ost_resp_info(idx downto idx),
            if_num_data_valid => open);
    end generate fifo_resp_gen;

    ost_ctrl_write       <= bit_set(ost_ctrl_id, ost_ctrl_valid);
    ost_resp_read        <= bit_set(resp_id, next_resp);
    resp_ready           <= '1' when ost_resp_valid(TO_INTEGER(resp_id)) = '1' and (in_AXI_BREADY(TO_INTEGER(resp_id)) = '1' or ost_resp_info(TO_INTEGER(resp_id)) = '0') else '0';
    next_resp            <= resp_valid and resp_ready;

    out_AXI_BVALID       <= ost_resp_info and bit_set(resp_id, resp_valid);
    local_BUS_BID        <= compress_axi_id(in_BUS_BID);
--------------------------- B channel end --------------------------------------
end architecture behave;


library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_burst_converter is
    generic (
        INTERLEAVE        : INTEGER := 1;
        ID_WIDTH          : INTEGER := 1;
        DATA_WIDTH        : INTEGER := 32;
        ADDR_WIDTH        : INTEGER := 32;
        LEN_WIDTH         : INTEGER := 32;
        MAX_BURST_LENGTH  : INTEGER := 16;
        NUM_PORTS         : INTEGER := 1);
    port (
        clk               : in  STD_LOGIC;
        reset             : in  STD_LOGIC;
        clk_en            : in  STD_LOGIC;
        in_REQ_ID         : in  UNSIGNED(ID_WIDTH-1 downto 0);
        in_REQ_ADDR       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        in_REQ_LEN        : in  UNSIGNED(LEN_WIDTH-1 downto 0);
        in_REQ_VALID      : in  STD_LOGIC;
        out_REQ_READY     : out UNSIGNED(NUM_PORTS-1 downto 0);
        out_BURST_ID      : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_BURST_ADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
        out_BURST_LEN     : out UNSIGNED(7 downto 0);
        out_BURST_VALID   : out STD_LOGIC;
        in_BURST_READY    : in  STD_LOGIC;
        out_CTRL_ID       : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_CTRL_INFO     : out UNSIGNED(0 downto 0);
        out_CTRL_VALID    : out STD_LOGIC;
        in_CTRL_READY     : in  UNSIGNED(NUM_PORTS-1 downto 0));

end entity s2mm_gmem_m_axi_burst_converter;

architecture behave of s2mm_gmem_m_axi_burst_converter is

    -- component
    component s2mm_gmem_m_axi_burst_interleave is
        generic (
            ID_WIDTH          : INTEGER := 1;
            DATA_WIDTH        : INTEGER := 32;
            ADDR_WIDTH        : INTEGER := 32;
            LEN_WIDTH         : INTEGER := 32;
            MAX_BURST_LENGTH  : INTEGER := 16;
            NUM_PORTS         : INTEGER := 1);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            in_REQ_ID         : in  UNSIGNED(ID_WIDTH-1 downto 0);
            in_REQ_ADDR       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
            in_REQ_LEN        : in  UNSIGNED(LEN_WIDTH-1 downto 0);
            in_REQ_VALID      : in  STD_LOGIC;
            out_REQ_READY     : out UNSIGNED(NUM_PORTS-1 downto 0); 
            out_BURST_ID      : out UNSIGNED(ID_WIDTH-1 downto 0);
            out_BURST_ADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
            out_BURST_LEN     : out UNSIGNED(7 downto 0);
            out_BURST_VALID   : out STD_LOGIC;
            in_BURST_READY    : in  STD_LOGIC;
            out_CTRL_ID       : out UNSIGNED(ID_WIDTH-1 downto 0);
            out_CTRL_INFO     : out UNSIGNED(0 downto 0);
            out_CTRL_VALID    : out STD_LOGIC;
            in_CTRL_READY     : in  UNSIGNED(NUM_PORTS-1 downto 0));
    end component s2mm_gmem_m_axi_burst_interleave;

    component s2mm_gmem_m_axi_burst_sequential is
        generic (
            ID_WIDTH          : INTEGER := 1;
            DATA_WIDTH        : INTEGER := 32;
            ADDR_WIDTH        : INTEGER := 32;
            LEN_WIDTH         : INTEGER := 32;
            MAX_BURST_LENGTH  : INTEGER := 16;
            NUM_PORTS         : INTEGER := 1);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            in_REQ_ID         : in  UNSIGNED(ID_WIDTH-1 downto 0);
            in_REQ_ADDR       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
            in_REQ_LEN        : in  UNSIGNED(LEN_WIDTH-1 downto 0);
            in_REQ_VALID      : in  STD_LOGIC;
            out_REQ_READY     : out UNSIGNED(NUM_PORTS-1 downto 0); 
            out_BURST_ID      : out UNSIGNED(ID_WIDTH-1 downto 0);
            out_BURST_ADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
            out_BURST_LEN     : out UNSIGNED(7 downto 0);
            out_BURST_VALID   : out STD_LOGIC;
            in_BURST_READY    : in  STD_LOGIC;
            out_CTRL_ID       : out UNSIGNED(ID_WIDTH-1 downto 0);
            out_CTRL_INFO     : out UNSIGNED(0 downto 0);
            out_CTRL_VALID    : out STD_LOGIC;
            in_CTRL_READY     : in  UNSIGNED(NUM_PORTS-1 downto 0));
    end component s2mm_gmem_m_axi_burst_sequential;

begin
    -- Instantiation 
    interleaved_bursts : if ((INTERLEAVE = 1) and (NUM_PORTS /= 1)) generate
    begin
        burst_interleave : s2mm_gmem_m_axi_burst_interleave
        generic map (
            ID_WIDTH         => ID_WIDTH,
            DATA_WIDTH       => DATA_WIDTH,
            ADDR_WIDTH       => ADDR_WIDTH,
            LEN_WIDTH        => LEN_WIDTH,
            MAX_BURST_LENGTH => MAX_BURST_LENGTH,
            NUM_PORTS        => NUM_PORTS)
        port map (
            clk              => clk,
            reset            => reset,
            clk_en           => clk_en,
            in_REQ_ID        => in_REQ_ID,
            in_REQ_ADDR      => in_REQ_ADDR,
            in_REQ_LEN       => in_REQ_LEN,
            in_REQ_VALID     => in_REQ_VALID,
            out_REQ_READY    => out_REQ_READY,
            out_BURST_ID     => out_BURST_ID,
            out_BURST_ADDR   => out_BURST_ADDR ,
            out_BURST_LEN    => out_BURST_LEN,
            out_BURST_VALID  => out_BURST_VALID,
            in_BURST_READY   => in_BURST_READY,
            out_CTRL_ID      => out_CTRL_ID,
            out_CTRL_INFO    => out_CTRL_INFO,
            out_CTRL_VALID   => out_CTRL_VALID,
            in_CTRL_READY    => in_CTRL_READY);
    end generate interleaved_bursts;

    sequential_bursts : if ((INTERLEAVE /= 1) or (NUM_PORTS = 1)) generate
    begin
        burst_sequential : s2mm_gmem_m_axi_burst_sequential
        generic map (
            ID_WIDTH         => ID_WIDTH,
            DATA_WIDTH       => DATA_WIDTH,
            ADDR_WIDTH       => ADDR_WIDTH,
            LEN_WIDTH        => LEN_WIDTH,
            MAX_BURST_LENGTH => MAX_BURST_LENGTH,
            NUM_PORTS        => NUM_PORTS)
        port map (
            clk              => clk,
            reset            => reset,
            clk_en           => clk_en,
            in_REQ_ID        => in_REQ_ID,
            in_REQ_ADDR      => in_REQ_ADDR,
            in_REQ_LEN       => in_REQ_LEN,
            in_REQ_VALID     => in_REQ_VALID,
            out_REQ_READY    => out_REQ_READY,
            out_BURST_ID     => out_BURST_ID,
            out_BURST_ADDR   => out_BURST_ADDR ,
            out_BURST_LEN    => out_BURST_LEN,
            out_BURST_VALID  => out_BURST_VALID,
            in_BURST_READY   => in_BURST_READY,
            out_CTRL_ID      => out_CTRL_ID,
            out_CTRL_INFO    => out_CTRL_INFO,
            out_CTRL_VALID   => out_CTRL_VALID,
            in_CTRL_READY    => in_CTRL_READY);
    end generate sequential_bursts;
end architecture behave;


library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_burst_interleave is
    generic (
        ID_WIDTH          : INTEGER := 1;
        DATA_WIDTH        : INTEGER := 32;
        ADDR_WIDTH        : INTEGER := 32;
        LEN_WIDTH         : INTEGER := 32;
        MAX_BURST_LENGTH  : INTEGER := 16;
        NUM_PORTS         : INTEGER := 1);
    port (
        clk               : in  STD_LOGIC;
        reset             : in  STD_LOGIC;
        clk_en            : in  STD_LOGIC;
        in_REQ_ID         : in  UNSIGNED(ID_WIDTH-1 downto 0);
        in_REQ_ADDR       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        in_REQ_LEN        : in  UNSIGNED(LEN_WIDTH-1 downto 0);
        in_REQ_VALID      : in  STD_LOGIC;
        out_REQ_READY     : out UNSIGNED(NUM_PORTS-1 downto 0);
        out_BURST_ID      : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_BURST_ADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
        out_BURST_LEN     : out UNSIGNED(7 downto 0);
        out_BURST_VALID   : out STD_LOGIC;
        in_BURST_READY    : in  STD_LOGIC;
        out_CTRL_ID       : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_CTRL_INFO     : out UNSIGNED(0 downto 0);
        out_CTRL_VALID    : out STD_LOGIC;
        in_CTRL_READY     : in  UNSIGNED(NUM_PORTS-1 downto 0));

end entity s2mm_gmem_m_axi_burst_interleave;

architecture behave of s2mm_gmem_m_axi_burst_interleave is

    function or_reduce(a : UNSIGNED(NUM_PORTS-1 downto 0)) return STD_LOGIC is
        variable ret : STD_LOGIC := '0';
    begin
        for i in a'range loop
            ret := ret or a(i);
        end loop;
    
        return ret;
    end function or_reduce;

    function log2 (x : INTEGER) return INTEGER is
        variable n, m : INTEGER;
    begin
        n := 0;
        m := 1;
        while m < x loop
            n := n + 1;
            m := m * 2;
        end loop;
        return n;
    end function log2;

    --common
    constant PACK_WIDTH           : INTEGER := ID_WIDTH + ADDR_WIDTH + LEN_WIDTH;
    constant DATA_BYTES           : INTEGER := DATA_WIDTH / 8;
    constant ADDR_ALIGN           : INTEGER := log2(DATA_BYTES);
    constant BOUNDARY_BEATS       : UNSIGNED(11-ADDR_ALIGN downto 0) := (others => '1');
    constant NUM_BEAT_WIDTH       : INTEGER := log2(MAX_BURST_LENGTH);

    --local signals
    signal  req_pack_in           : UNSIGNED(PACK_WIDTH-1 downto 0);
    signal  req_pack_out          : UNSIGNED(PACK_WIDTH-1 downto 0);
    signal  req_id_tmp            : UNSIGNED(ID_WIDTH-1 downto 0);
    signal  req_addr_tmp          : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  req_len_tmp           : UNSIGNED(LEN_WIDTH-1 downto 0);
    signal  req_ready             : UNSIGNED(NUM_PORTS-1 downto 0);
    
    signal  req_full_n            : STD_LOGIC;
    signal  req_empty_n           : STD_LOGIC;
    signal  write_req             : STD_LOGIC;
    signal  read_req              : STD_LOGIC;
    signal  next_req              : STD_LOGIC;

    signal  start_addr            : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  sect_addr             : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  sect_addr_buf         : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  req_id                : UNSIGNED(ID_WIDTH-1 downto 0);
    signal  req_id_buf            : UNSIGNED(ID_WIDTH-1 downto 0);

    signal  beat_len              : UNSIGNED(LEN_WIDTH-1 downto 0);
    signal  beat_len_buf          : UNSIGNED(LEN_WIDTH-1 downto 0);
    signal  start_to_4k           : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  end_from_4k           : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  sect_len              : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  sect_len_buf          : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  sect_cnt              : UNSIGNED(ADDR_WIDTH-13 downto 0);
    signal  sect_total            : UNSIGNED(LEN_WIDTH-13 downto 0);
    signal  sect_total_buf        : UNSIGNED(LEN_WIDTH-13 downto 0);
    signal  sect_total_tmp        : UNSIGNED(LEN_WIDTH-13 downto 0);

    signal  req_handling          : BOOLEAN;
    signal  single_sect           : BOOLEAN;
    signal  first_sect            : BOOLEAN;
    signal  last_sect             : BOOLEAN;
    signal  last_sect_buf         : BOOLEAN;
    signal  last_sect_tmp         : BOOLEAN;
    signal  ready_for_sect        : BOOLEAN;
    signal  next_sect             : BOOLEAN;

    signal  burst_valid           : STD_LOGIC;

    signal  ost_ctrl_id           : UNSIGNED(ID_WIDTH-1 downto 0);
    signal  ost_ctrl_info         : UNSIGNED(0 downto 0);
    signal  ost_ctrl_valid        : STD_LOGIC;
    signal  ost_ctrl_ready        : STD_LOGIC;

    signal  rem_req_pack          : UNSIGNED(PACK_WIDTH-1 downto 0);
    signal  rem_req_valid         : STD_LOGIC;
    signal  rem_req_id            : UNSIGNED(ID_WIDTH-1 downto 0);
    signal  rem_req_addr          : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  rem_req_len           : UNSIGNED(LEN_WIDTH-1 downto 0);
    signal  rem_req_addr_pred     : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  rem_req_len_pred      : UNSIGNED(LEN_WIDTH-1 downto 0);

    component s2mm_gmem_m_axi_fifo is
        generic (
            MEM_STYLE         : STRING  := "shiftreg";
            DATA_WIDTH        : INTEGER := 8;
            ADDR_WIDTH        : INTEGER := 4;
            DEPTH             : INTEGER := 16);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            if_full_n         : out STD_LOGIC;
            if_write          : in  STD_LOGIC;
            if_din            : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            if_empty_n        : out STD_LOGIC;
            if_read           : in  STD_LOGIC;
            if_dout           : out UNSIGNED(DATA_WIDTH-1 downto 0);
            if_num_data_valid : out UNSIGNED(ADDR_WIDTH downto 0));
    end component s2mm_gmem_m_axi_fifo;

    component s2mm_gmem_m_axi_reg_slice is
        generic (
            DATA_WIDTH  : INTEGER := 8);
        port (
            clk         : in  STD_LOGIC;
            reset       : in  STD_LOGIC;
            s_data      : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            s_valid     : in  STD_LOGIC;
            s_ready     : out STD_LOGIC;
            m_data      : out UNSIGNED(DATA_WIDTH-1 downto 0);
            m_valid     : out STD_LOGIC;
            m_ready     : in  STD_LOGIC);
    end component s2mm_gmem_m_axi_reg_slice;

begin
    --------------------------- AR channel begin -----------------------------------
    -- Instantiation
    num_ports_gt2 : if (NUM_PORTS > 2) generate 
        req_buffer : s2mm_gmem_m_axi_fifo
        generic map (
            DATA_WIDTH => PACK_WIDTH,
            DEPTH      => NUM_PORTS,
            ADDR_WIDTH => log2(NUM_PORTS))
        port map (
            clk        => clk,
            reset      => reset,
            clk_en     => clk_en,
            if_full_n  => req_full_n,
            if_write   => write_req,
            if_din     => req_pack_in,
            if_empty_n => req_empty_n,
            if_read    => read_req,
            if_dout    => req_pack_out,
            if_num_data_valid => open);
    end generate num_ports_gt2;
    
    num_ports_ngt2 : if (NUM_PORTS <= 2) generate 
        rs_req : s2mm_gmem_m_axi_reg_slice
        generic map (
            DATA_WIDTH =>  PACK_WIDTH)
        port map (
            clk        =>  clk,
            reset      =>  reset,
            s_data     =>  req_pack_in,
            s_valid    =>  write_req,
            s_ready    =>  req_full_n,
            m_data     =>  req_pack_out,
            m_valid    =>  req_empty_n,
            m_ready    =>  read_req);
    end generate num_ports_ngt2;

    out_REQ_READY      <= req_ready    when req_full_n = '1' and rem_req_valid = '0'   else (others=>'0');
    req_pack_in        <= rem_req_pack when rem_req_valid = '1' else (in_REQ_ID & in_REQ_LEN & in_REQ_ADDR);
    write_req          <= rem_req_valid or in_REQ_VALID;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_ready <= (others=>'1');
            elsif clk_en = '1' then
                if in_REQ_VALID = '1' and req_full_n = '1' and rem_req_valid = '0' then
                    req_ready(TO_INTEGER(in_REQ_ID)) <= '0';
                end if;
                if ost_ctrl_info = "1" and ost_ctrl_valid = '1' then
                    req_ready(TO_INTEGER(ost_ctrl_id)) <= '1';
                end if;
            end if;
        end if;
    end process;    

    req_addr_tmp       <= req_pack_out(ADDR_WIDTH-1  downto 0);
    req_len_tmp        <= req_pack_out(ADDR_WIDTH+LEN_WIDTH-1 downto ADDR_WIDTH);
    req_id_tmp         <= req_pack_out(PACK_WIDTH-1  downto ADDR_WIDTH+LEN_WIDTH);
    next_req           <= read_req and req_empty_n;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_id      <= (others => '0');
                start_addr  <= (others => '0');
                start_to_4k <= (others => '0');
                end_from_4k <= (others => '0');
                sect_total  <= (others => '0');
            elsif clk_en = '1' then
                if next_req = '1' then
                    req_id      <= req_id_tmp;
                    start_addr  <= req_addr_tmp(ADDR_WIDTH-1 downto ADDR_ALIGN) & (ADDR_ALIGN-1 downto 0 => '0');
                    start_to_4k <= BOUNDARY_BEATS - req_addr_tmp(11 downto ADDR_ALIGN);
                    end_from_4k <= RESIZE(SHIFT_RIGHT(req_len_tmp(11 downto 0) + req_addr_tmp(11 downto 0) , ADDR_ALIGN), 12-ADDR_ALIGN);
                    sect_total  <= RESIZE(SHIFT_RIGHT(req_len_tmp + req_addr_tmp(11 downto 0) , 12), LEN_WIDTH-12);
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_handling <= false;
            elsif clk_en = '1' then
                if next_req = '1' then
                    req_handling <= true;
                elsif req_empty_n = '0' and last_sect_tmp and next_sect then
                    req_handling <= false;
                end if;
            end if;
        end if;
    end process;

    -- 4k boundary section
    last_sect_tmp  <= single_sect or last_sect;

    sect_total_tmp <= sect_total when first_sect else sect_total_buf;

    single_sect <= (sect_total = 0);

    sect_addr  <= start_addr when first_sect else
                  sect_cnt & (11 downto 0 => '0');
    sect_len   <= beat_len(11-ADDR_ALIGN downto 0) when     single_sect else
                  start_to_4k                      when     first_sect and not last_sect else
                  end_from_4k                      when not first_sect and     last_sect else
                  BOUNDARY_BEATS;
                          
    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                first_sect <= false;
                last_sect <= false;
                sect_cnt <= (others => '0');
                beat_len <= (others => '0');
            elsif clk_en = '1' then
                if next_req = '1' then
                    first_sect <= true;
                    last_sect <= false;
                    sect_cnt <= req_addr_tmp(ADDR_WIDTH - 1 downto 12);
                    beat_len <= SHIFT_RIGHT(req_len_tmp + req_addr_tmp(ADDR_ALIGN-1 downto 0), ADDR_ALIGN);
                elsif next_sect then
                    first_sect <= false;
                    last_sect <= (sect_total_tmp = 1);
                    sect_cnt <= sect_cnt + 1;
                    beat_len <= beat_len - sect_len - 1;
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_id_buf    <= (others => '0');
                sect_addr_buf <= (others => '0');
                sect_len_buf  <= (others => '0');
                last_sect_buf <= false;
                beat_len_buf  <= (others => '0');
                sect_total_buf<= (others => '0');
            elsif clk_en = '1' then
                if next_sect then
                    req_id_buf    <= req_id;
                    sect_addr_buf <= sect_addr;
                    sect_len_buf  <= sect_len;
                    last_sect_buf <= last_sect_tmp;
                    beat_len_buf  <= beat_len;
                    sect_total_buf<= sect_total_tmp - 1;
                end if;
            end if;
        end if;
    end process; 

    out_CTRL_VALID      <= ost_ctrl_valid;
    out_CTRL_ID         <= ost_ctrl_id;
    out_CTRL_INFO       <= ost_ctrl_info;

    must_one_burst : if (DATA_BYTES >= 4096/MAX_BURST_LENGTH) generate
        signal  read_sect    : BOOLEAN;
    begin
        out_BURST_ID    <= req_id_buf;
        out_BURST_ADDR  <= sect_addr_buf;
        out_BURST_LEN   <= RESIZE(sect_len_buf, 8);
        out_BURST_VALID <= burst_valid;

        ost_ctrl_id     <= req_id;
        ost_ctrl_info   <= "1" when last_sect_tmp else "0";
        ost_ctrl_valid  <= '1' when next_sect else '0';
        ost_ctrl_ready  <= in_CTRL_READY(TO_INTEGER(req_id));

        next_sect       <= read_sect and ost_ctrl_ready = '1';
        ready_for_sect  <= not (burst_valid = '1' and in_BURST_READY = '0') and req_full_n = '1' and or_reduce(in_CTRL_READY) = '1';
        read_sect       <= req_handling and ready_for_sect;
        read_req        <= '1' when not req_handling or ready_for_sect else '0';

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_valid <= '0';
                elsif clk_en = '1' then
                    if next_sect then
                        burst_valid <= '1';
                    elsif in_BURST_READY = '1' then
                        burst_valid <= '0';
                    end if;
                end if;
            end if;
        end process;

        -- calculate remaining request, for interleaved burst handling.
        rem_req_pack  <= rem_req_id & (rem_req_len(LEN_WIDTH-ADDR_ALIGN-1 downto 0) & (ADDR_ALIGN-1 downto 0 => '1')) & rem_req_addr;
        rem_req_addr_pred <= ((sect_cnt+1) & (11 downto 0 => '0')) when ost_ctrl_ready = '1' else sect_addr;
        rem_req_len_pred  <= (beat_len - sect_len - 1) when ost_ctrl_ready = '1' else beat_len;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    rem_req_id   <= (others => '0'); 
                    rem_req_addr <= (others => '0');
                    rem_req_len  <= (others => '0');
                elsif clk_en = '1' then
                    if read_sect then
                        rem_req_id   <= req_id;
                        rem_req_addr <= rem_req_addr_pred;
                        rem_req_len  <= rem_req_len_pred;
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    rem_req_valid <= '0';
                elsif clk_en = '1' then
                    if next_sect and last_sect_tmp then
                        rem_req_valid <= '0';
                    elsif req_empty_n = '1' and read_sect then
                        rem_req_valid <= '1';
                    elsif req_full_n = '1' then
                        rem_req_valid <= '0';
                    end if;
                end if;
            end if;
        end process;
    end generate must_one_burst;

    could_multi_bursts : if (DATA_BYTES < 4096/MAX_BURST_LENGTH) generate
        signal  burst_id        : UNSIGNED(ID_WIDTH-1 downto 0); 
        signal  burst_addr      : UNSIGNED(ADDR_WIDTH - 1 downto 0);
        signal  burst_addr_next : UNSIGNED(ADDR_WIDTH - 1 downto 0);
        signal  burst_addr_pred : UNSIGNED(ADDR_WIDTH - 1 downto 0);
        signal  burst_len       : UNSIGNED(7 downto 0);
        signal  burst_len_pred  : UNSIGNED(7 downto 0);
        signal  burst_len_pred_plus1 : UNSIGNED(8 downto 0);

        signal  loop_cnt        : UNSIGNED(11 - NUM_BEAT_WIDTH - ADDR_ALIGN downto 0);
        signal  first_loop      : BOOLEAN;
        signal  last_loop       : BOOLEAN;
        signal  next_loop       : BOOLEAN;
        signal  read_loop       : BOOLEAN;
        signal  ready_for_loop  : BOOLEAN;
        signal  sect_handling   : BOOLEAN;
        signal  next_req_ready  : BOOLEAN;

        signal  last_loop_when_next_loop : BOOLEAN;
        signal  last_loop_when_next_sect : BOOLEAN;
        signal  burst_len_when_next_loop : UNSIGNED(7 downto 0); 
        signal  burst_len_when_next_sect : UNSIGNED(7 downto 0); 
    begin
        out_BURST_ID    <= burst_id;
        out_BURST_ADDR  <= burst_addr;
        out_BURST_LEN   <= burst_len;
        out_BURST_VALID <= burst_valid;

        ost_ctrl_id     <= req_id_buf;
        ost_ctrl_info   <= "1" when last_sect_buf and last_loop else "0";
        ost_ctrl_valid  <= '1' when next_loop else '0';
        ost_ctrl_ready  <= in_CTRL_READY(TO_INTEGER(req_id_buf));
        
        read_req        <= '1' when not next_req_ready or ready_for_sect else '0';

        next_sect       <= req_handling and ready_for_sect;
        ready_for_sect  <= not sect_handling or (read_loop and next_req_ready) or (next_loop and last_loop);
        
        next_loop       <= read_loop and ost_ctrl_ready = '1';
        read_loop       <= sect_handling and ready_for_loop;
        ready_for_loop  <= not (burst_valid = '1' and in_BURST_READY = '0') and req_full_n = '1' and or_reduce(in_CTRL_READY) = '1'; 

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_valid <= '0';
                elsif clk_en = '1' then
                    if next_loop then
                        burst_valid <= '1';
                    elsif in_BURST_READY = '1' then
                        burst_valid <= '0';
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    sect_handling <= false;
                elsif clk_en = '1' then
                    if req_handling and not sect_handling then
                        sect_handling <= true;
                    elsif not req_handling and last_loop and next_loop then
                        sect_handling <= false;
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    first_loop <= false;
                    last_loop  <= false;
                    loop_cnt   <= (others => '0');
                elsif clk_en = '1' then
                    if next_sect then
                        first_loop <= true;
                        last_loop  <= last_loop_when_next_sect;
                        loop_cnt   <= sect_len(11 - ADDR_ALIGN downto NUM_BEAT_WIDTH);
                    elsif next_loop then
                        first_loop <= false;
                        last_loop  <= last_loop_when_next_loop;
                        loop_cnt   <= loop_cnt - 1;
                    end if;
                end if;
            end if;
        end process;

        last_loop_when_next_sect <= (sect_len(11 - ADDR_ALIGN downto NUM_BEAT_WIDTH) = 0);
        last_loop_when_next_loop <= (loop_cnt = 1);

        burst_addr_next          <= sect_addr_buf when first_loop else burst_addr_pred;
        burst_len_when_next_sect <= (others => '0')                                  when (NUM_BEAT_WIDTH = 0)     else
                                    RESIZE(sect_len(NUM_BEAT_WIDTH-1 downto 0), 8)   when last_loop_when_next_sect else 
                                    TO_UNSIGNED(2**NUM_BEAT_WIDTH-1, 8);
        burst_len_when_next_loop <= (others => '0')                                    when (NUM_BEAT_WIDTH = 0)     else
                                    RESIZE(sect_len_buf(NUM_BEAT_WIDTH-1 downto 0), 8) when last_loop_when_next_loop else 
                                    TO_UNSIGNED(2**NUM_BEAT_WIDTH-1, 8);

        rem_req_pack <= rem_req_id & (rem_req_len(LEN_WIDTH-ADDR_ALIGN-1 downto 0) & (ADDR_ALIGN-1 downto 0 => '1')) & rem_req_addr;
        rem_req_addr_pred <= sect_addr_buf   + (burst_len_pred_plus1 & (ADDR_ALIGN-1 downto 0 => '0')) when ost_ctrl_ready = '1' and first_loop else
                             burst_addr_pred + (burst_len_pred_plus1 & (ADDR_ALIGN-1 downto 0 => '0')) when ost_ctrl_ready = '1' and not first_loop else
                             sect_addr_buf                                                             when ost_ctrl_ready = '0' and first_loop else
                             burst_addr_pred;
        rem_req_len_pred  <= beat_len_buf - burst_len_pred_plus1 when ost_ctrl_ready = '1' and first_loop else
                             rem_req_len  - burst_len_pred_plus1 when ost_ctrl_ready = '1' and not first_loop else
                             beat_len_buf                        when ost_ctrl_ready = '0' and first_loop else
                             rem_req_len;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_len_pred       <= TO_UNSIGNED(2**NUM_BEAT_WIDTH-1, 8);
                    burst_len_pred_plus1 <= TO_UNSIGNED(2**NUM_BEAT_WIDTH, 9);
                elsif clk_en = '1' then
                    if next_sect then
                        burst_len_pred       <= burst_len_when_next_sect;
                        burst_len_pred_plus1 <= ('0' & burst_len_when_next_sect) + 1;
                    elsif next_loop then
                        burst_len_pred       <= burst_len_when_next_loop;
                        burst_len_pred_plus1 <= ('0' & burst_len_when_next_loop) + 1;
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_id   <= (others => '0');
                    burst_addr <= (others => '0');
                    burst_len  <= (others => '0');
                    burst_addr_pred <= (others => '0');
                elsif clk_en = '1' then
                    if next_loop then
                        burst_id   <= req_id_buf;
                        burst_addr <= burst_addr_next;
                        burst_len  <= burst_len_pred;
                        burst_addr_pred <= burst_addr_next + (burst_len_pred_plus1 & (ADDR_ALIGN-1 downto 0 => '0'));
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    rem_req_id   <= (others => '0');
                    rem_req_addr <= (others => '0');
                    rem_req_len  <= (others => '0');
                elsif clk_en = '1' then
                    if read_loop then
                        rem_req_id   <= req_id_buf;
                        rem_req_addr <= rem_req_addr_pred;
                        rem_req_len  <= rem_req_len_pred;
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    next_req_ready   <= false;
                elsif clk_en = '1' then
                    if next_req = '1' then
                        next_req_ready   <= true;
                    elsif next_sect then
                        next_req_ready   <= false;
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    rem_req_valid <= '0';
                elsif clk_en = '1' then
                    if next_loop and last_loop and last_sect_buf then
                        rem_req_valid <= '0';
                    elsif next_req_ready and read_loop then
                        rem_req_valid <= '1';
                    elsif req_full_n = '1' then
                        rem_req_valid <= '0';
                    end if;
                end if;
            end if;
        end process; 
    end generate could_multi_bursts;
end architecture behave;


library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_burst_sequential is
    generic (
        ID_WIDTH          : INTEGER := 1;
        DATA_WIDTH        : INTEGER := 32;
        ADDR_WIDTH        : INTEGER := 32;
        LEN_WIDTH         : INTEGER := 32;
        MAX_BURST_LENGTH  : INTEGER := 16;
        NUM_PORTS         : INTEGER := 1);
    port (
        clk               : in  STD_LOGIC;
        reset             : in  STD_LOGIC;
        clk_en            : in  STD_LOGIC;
        in_REQ_ID         : in  UNSIGNED(ID_WIDTH-1 downto 0);
        in_REQ_ADDR       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        in_REQ_LEN        : in  UNSIGNED(LEN_WIDTH-1 downto 0);
        in_REQ_VALID      : in  STD_LOGIC;
        out_REQ_READY     : out UNSIGNED(NUM_PORTS-1 downto 0);
        out_BURST_ID      : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_BURST_ADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
        out_BURST_LEN     : out UNSIGNED(7 downto 0);
        out_BURST_VALID   : out STD_LOGIC;
        in_BURST_READY    : in  STD_LOGIC;
        out_CTRL_ID       : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_CTRL_INFO     : out UNSIGNED(0 downto 0);
        out_CTRL_VALID    : out STD_LOGIC;
        in_CTRL_READY     : in  UNSIGNED(NUM_PORTS-1 downto 0));

end entity s2mm_gmem_m_axi_burst_sequential;

architecture behave of s2mm_gmem_m_axi_burst_sequential is

    function log2 (x : INTEGER) return INTEGER is
        variable n, m : INTEGER;
    begin
        n := 0;
        m := 1;
        while m < x loop
            n := n + 1;
            m := m * 2;
        end loop;
        return n;
    end function log2;

    --common
    constant PACK_WIDTH           : INTEGER := ID_WIDTH + ADDR_WIDTH + LEN_WIDTH;
    constant DATA_BYTES           : INTEGER := DATA_WIDTH / 8;
    constant ADDR_ALIGN           : INTEGER := log2(DATA_BYTES);
    constant BOUNDARY_BEATS       : UNSIGNED(11-ADDR_ALIGN downto 0) := (others => '1');
    constant NUM_BEAT_WIDTH       : INTEGER := log2(MAX_BURST_LENGTH);

    --local signals
    signal  req_pack_in           : UNSIGNED(PACK_WIDTH-1 downto 0);
    signal  req_pack_out          : UNSIGNED(PACK_WIDTH-1 downto 0);
    signal  req_id_tmp            : UNSIGNED(ID_WIDTH-1 downto 0);
    signal  req_addr_tmp          : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  req_len_tmp           : UNSIGNED(LEN_WIDTH-1 downto 0);
    
    signal  req_full_n            : STD_LOGIC;
    signal  req_empty_n           : STD_LOGIC;
    signal  write_req             : STD_LOGIC;
    signal  read_req              : STD_LOGIC;
    signal  next_req              : STD_LOGIC;

    signal  start_addr            : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  sect_addr             : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  sect_addr_buf         : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal  req_id                : UNSIGNED(ID_WIDTH-1 downto 0);
    signal  req_id_buf            : UNSIGNED(ID_WIDTH-1 downto 0);

    signal  beat_len              : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  start_to_4k           : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  end_from_4k           : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  sect_len              : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  sect_len_buf          : UNSIGNED(11-ADDR_ALIGN downto 0);
    signal  sect_cnt              : UNSIGNED(ADDR_WIDTH-13 downto 0);
    signal  sect_total            : UNSIGNED(LEN_WIDTH-13 downto 0);
    signal  sect_total_buf        : UNSIGNED(LEN_WIDTH-13 downto 0);
    signal  sect_total_tmp        : UNSIGNED(LEN_WIDTH-13 downto 0);

    signal  req_handling          : BOOLEAN;
    signal  single_sect           : BOOLEAN;
    signal  first_sect            : BOOLEAN;
    signal  last_sect             : BOOLEAN;
    signal  last_sect_buf         : BOOLEAN;
    signal  last_sect_tmp         : BOOLEAN;
    signal  ready_for_sect        : BOOLEAN;
    signal  next_sect             : BOOLEAN;

    signal  burst_valid           : STD_LOGIC;
    signal  ost_ctrl_ready        : STD_LOGIC;

    component s2mm_gmem_m_axi_reg_slice is
        generic (
            DATA_WIDTH  : INTEGER := 8);
        port (
            clk         : in  STD_LOGIC;
            reset       : in  STD_LOGIC;
            s_data      : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            s_valid     : in  STD_LOGIC;
            s_ready     : out STD_LOGIC;
            m_data      : out UNSIGNED(DATA_WIDTH-1 downto 0);
            m_valid     : out STD_LOGIC;
            m_ready     : in  STD_LOGIC);
    end component s2mm_gmem_m_axi_reg_slice;

begin
    --------------------------- AR channel begin -----------------------------------
    -- Instantiation
    rs_req : s2mm_gmem_m_axi_reg_slice
        generic map (
            DATA_WIDTH =>  PACK_WIDTH)
        port map (
            clk        =>  clk,
            reset      =>  reset,
            s_data     =>  req_pack_in,
            s_valid    =>  write_req,
            s_ready    =>  req_full_n,
            m_data     =>  req_pack_out,
            m_valid    =>  req_empty_n,
            m_ready    =>  read_req);

    out_REQ_READY      <= (others => req_full_n);
    req_pack_in        <= in_REQ_ID & in_REQ_LEN & in_REQ_ADDR;
    write_req          <= in_REQ_VALID;

    req_addr_tmp       <= req_pack_out(ADDR_WIDTH-1  downto 0);
    req_len_tmp        <= req_pack_out(ADDR_WIDTH+LEN_WIDTH-1 downto ADDR_WIDTH);
    req_id_tmp         <= req_pack_out(PACK_WIDTH-1  downto ADDR_WIDTH+LEN_WIDTH);

    read_req           <= '1' when not req_handling or (last_sect_tmp and next_sect) else '0';
    next_req           <= read_req and req_empty_n;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_id      <= (others => '0');
                start_addr  <= (others => '0');
                beat_len    <= (others => '0');
                start_to_4k <= (others => '0');
                end_from_4k <= (others => '0');
                sect_total  <= (others => '0');
            elsif clk_en = '1' then
                if next_req = '1' then
                    req_id      <= req_id_tmp;
                    start_addr  <= req_addr_tmp(ADDR_WIDTH-1 downto ADDR_ALIGN) & (ADDR_ALIGN-1 downto 0 => '0');
                    beat_len    <= RESIZE(SHIFT_RIGHT(req_len_tmp(11 downto 0) + req_addr_tmp(ADDR_ALIGN-1 downto 0), ADDR_ALIGN), 12-ADDR_ALIGN);
                    end_from_4k <= RESIZE(SHIFT_RIGHT(req_len_tmp(11 downto 0) + req_addr_tmp(11 downto 0)          , ADDR_ALIGN), 12-ADDR_ALIGN);
                    start_to_4k <= BOUNDARY_BEATS - req_addr_tmp(11 downto ADDR_ALIGN);
                    sect_total  <= RESIZE(SHIFT_RIGHT(req_len_tmp + req_addr_tmp(11 downto 0) , 12), LEN_WIDTH-12);
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_handling <= false;
            elsif clk_en = '1' then
                if next_req = '1' then
                    req_handling <= true;
                elsif req_empty_n = '0' and last_sect_tmp and next_sect then
                    req_handling <= false;
                end if;
            end if;
        end if;
    end process;

    -- 4k boundary section
    last_sect_tmp  <= single_sect or last_sect;

    sect_total_tmp <= sect_total when first_sect else sect_total_buf;

    single_sect <= (sect_total = 0);

    -- next_sect  <= req_handling and ready_for_sect;

    sect_addr  <= start_addr when first_sect else
                  sect_cnt & (11 downto 0 => '0');
    sect_len   <= beat_len     when     single_sect else
                  start_to_4k  when     first_sect and not last_sect else
                  end_from_4k  when not first_sect and     last_sect else
                  BOUNDARY_BEATS;
    
    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                first_sect <= false;
                last_sect <= false;
                sect_cnt <= (others => '0');
            elsif clk_en = '1' then
                if next_req = '1' then
                    first_sect <= true;
                    last_sect <= false;
                    sect_cnt <= req_addr_tmp(ADDR_WIDTH - 1 downto 12);
                elsif next_sect then
                    first_sect <= false;
                    last_sect <= (sect_total_tmp = 1);
                    sect_cnt <= sect_cnt + 1;
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                req_id_buf     <= (others=>'0');
                sect_addr_buf  <= (others => '0');
                sect_len_buf   <= (others => '0');
                last_sect_buf  <= false;
                sect_total_buf <= (others => '0');
            elsif clk_en = '1' then
                if next_sect then
                    req_id_buf     <= req_id;
                    sect_addr_buf  <= sect_addr;
                    sect_len_buf   <= sect_len;
                    last_sect_buf  <= last_sect_tmp;
                    sect_total_buf <= sect_total_tmp - 1;
                end if;
            end if;
        end if;
    end process; 

    must_one_burst : if (DATA_BYTES >= 4096/MAX_BURST_LENGTH) generate
        signal read_sect : BOOLEAN;
    begin
        out_BURST_ID    <= req_id_buf;
        out_BURST_ADDR  <= sect_addr_buf;
        out_BURST_LEN   <= RESIZE(sect_len_buf, 8);
        out_BURST_VALID <= burst_valid;

        out_CTRL_ID     <= req_id;
        out_CTRL_INFO   <= "1" when last_sect_tmp else "0";
        out_CTRL_VALID  <= '1' when next_sect else '0';
        ost_ctrl_ready  <= in_CTRL_READY(TO_INTEGER(req_id));

        next_sect       <= read_sect and (ost_ctrl_ready = '1');
        read_sect       <= req_handling and ready_for_sect;
        ready_for_sect  <= not (burst_valid = '1' and in_BURST_READY = '0');

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_valid <= '0';
                elsif clk_en = '1' then
                    if next_sect then
                        burst_valid <= '1';
                    elsif in_BURST_READY = '1' then
                        burst_valid <= '0';
                    end if;
                end if;
            end if;
        end process;
    end generate must_one_burst;

    could_multi_bursts : if (DATA_BYTES < 4096/MAX_BURST_LENGTH) generate
        signal  burst_id        : UNSIGNED(ID_WIDTH-1 downto 0);
        signal  burst_addr      : UNSIGNED(ADDR_WIDTH - 1 downto 0);
        signal  burst_addr_next : UNSIGNED(ADDR_WIDTH - 1 downto 0);
        signal  burst_len       : UNSIGNED(7 downto 0);
        signal  burst_len_plus1 : UNSIGNED(8 downto 0);
        signal  burst_len_next  : UNSIGNED(7 downto 0);
        signal  loop_cnt        : UNSIGNED(11 - NUM_BEAT_WIDTH - ADDR_ALIGN downto 0);
        signal  first_loop      : BOOLEAN;
        signal  last_loop       : BOOLEAN;
        signal  next_loop       : BOOLEAN;
        signal  read_loop       : BOOLEAN;
        signal  ready_for_loop  : BOOLEAN;
        signal  sect_handling   : BOOLEAN;
    begin
        out_BURST_ID    <= burst_id;
        out_BURST_ADDR  <= burst_addr;
        out_BURST_LEN   <= burst_len;
        out_BURST_VALID <= burst_valid;

        out_CTRL_ID     <= req_id_buf;
        out_CTRL_INFO   <= "1" when last_sect_buf and last_loop else "0";
        out_CTRL_VALID  <= '1' when next_loop else '0';
        ost_ctrl_ready  <= in_CTRL_READY(TO_INTEGER(req_id_buf));

        next_loop       <= read_loop and (ost_ctrl_ready = '1');
        read_loop       <= sect_handling and ready_for_loop;
        ready_for_loop  <= not (burst_valid = '1' and in_BURST_READY = '0');

        next_sect       <= req_handling and ready_for_sect;
        ready_for_sect  <= not sect_handling or (last_loop and next_loop);

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_valid <= '0';
                elsif clk_en = '1' then
                    if next_loop then
                        burst_valid <= '1';
                    elsif in_BURST_READY = '1' then
                        burst_valid <= '0';
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    sect_handling <= false;
                elsif clk_en = '1' then
                    if req_handling and not sect_handling then
                        sect_handling <= true;
                    elsif not req_handling and last_loop and next_loop then
                        sect_handling <= false;
                    end if;
                end if;
            end if;
        end process;

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    first_loop <= false;
                    last_loop <= false;
                    loop_cnt <= (others => '0');
                elsif clk_en = '1' then
                    if next_sect then
                        first_loop <= true;
                        last_loop <= (sect_len(11 - ADDR_ALIGN downto NUM_BEAT_WIDTH) = 0);
                        loop_cnt <= sect_len(11 - ADDR_ALIGN downto NUM_BEAT_WIDTH);
                    elsif next_loop then
                        first_loop <= false;
                        last_loop <= (loop_cnt = 1);
                        loop_cnt <= loop_cnt - 1;
                    end if;
                end if;
            end if;
        end process;

        burst_addr_next <= sect_addr_buf when first_loop else (burst_addr + (burst_len_plus1 & (ADDR_ALIGN-1 downto 0 => '0')));

        burst_len_next  <= (others => '0')                                    when (NUM_BEAT_WIDTH = 0) else
                           RESIZE(sect_len_buf(NUM_BEAT_WIDTH-1 downto 0), 8) when last_loop else
                           TO_UNSIGNED(2**NUM_BEAT_WIDTH-1, 8);

        process (clk)
        begin
            if (clk'event and clk = '1') then
                if (reset = '1') then
                    burst_id   <= (others => '0');
                    burst_addr <= (others => '0');
                    burst_len  <= (others => '0');
                    burst_len_plus1 <= (others => '0');
                elsif clk_en = '1' then
                    if next_loop then
                        burst_id   <= req_id_buf;
                        burst_addr <= burst_addr_next;
                        burst_len  <= burst_len_next;
                        burst_len_plus1 <= ('0' & burst_len_next) + 1;
                    end if;
                end if;
            end if;
        end process;
    end generate could_multi_bursts;
end architecture behave;


library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_throttle is
    generic (
        CONSERVATIVE      : INTEGER := 0;
        ID_WIDTH          : INTEGER := 1;
        ADDR_WIDTH        : INTEGER := 32;
        DATA_WIDTH        : INTEGER := 32;
        NUM_OUTSTANDING   : INTEGER := 16;
        ID0_NUM_OUTSTANDING : INTEGER := 2;
        NUM_PORTS         : INTEGER := 1);
    port (
        clk               : in  STD_LOGIC;
        reset             : in  STD_LOGIC;
        clk_en            : in  STD_LOGIC;
        in_BURST_AWID     : in  UNSIGNED(ID_WIDTH-1 downto 0);
        in_BURST_AWADDR   : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        in_BURST_AWLEN    : in  UNSIGNED(7 downto 0);
        in_BURST_AWVALID  : in  STD_LOGIC;
        out_BURST_AWREADY : out STD_LOGIC;
        in_BURST_WID      : in  UNSIGNED(ID_WIDTH-1 downto 0);
        in_BURST_WLEN     : in  UNSIGNED(7 downto 0);
        in_BURST_WVALID   : in  STD_LOGIC;
        out_BURST_WREADY  : out STD_LOGIC;
        out_AXI_WID       : out UNSIGNED(ID_WIDTH-1 downto 0);
        in_AXI_WDATA      : in  UNSIGNED(DATA_WIDTH-1 downto 0);
        in_AXI_WSTRB      : in  UNSIGNED(DATA_WIDTH/8-1 downto 0);
        in_AXI_WVALID     : in  STD_LOGIC;
        out_AXI_WREADY    : out STD_LOGIC;
        out_BUS_AWID      : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_BUS_AWADDR    : out UNSIGNED(ADDR_WIDTH-1 downto 0);
        out_BUS_AWLEN     : out UNSIGNED(7 downto 0);
        out_BUS_AWVALID   : out STD_LOGIC;
        in_BUS_AWREADY    : in  STD_LOGIC;
        out_BUS_WID       : out UNSIGNED(ID_WIDTH-1 downto 0);
        out_BUS_WDATA     : out UNSIGNED(DATA_WIDTH-1 downto 0);
        out_BUS_WSTRB     : out UNSIGNED(DATA_WIDTH/8-1 downto 0);
        out_BUS_WLAST     : out STD_LOGIC;
        out_BUS_WVALID    : out STD_LOGIC;
        in_BUS_WREADY     : in  STD_LOGIC);

end entity s2mm_gmem_m_axi_throttle;

architecture behav of s2mm_gmem_m_axi_throttle is

    function log2 (x : INTEGER) return INTEGER is
        variable n, m : INTEGER;
    begin
        n := 0;
        m := 1;
        while m < x loop
            n := n + 1;
            m := m * 2;
        end loop;
        return n;
    end function log2;

    function bit_set (idx : UNSIGNED(ID_WIDTH-1 downto 0); valid : STD_LOGIC) 
    return UNSIGNED is
        variable ret : UNSIGNED(NUM_PORTS-1 downto 0);
    begin
        ret := (others=>'0');
        ret(TO_INTEGER(idx)) := valid;
        return ret;
    end function bit_set;

    function num_outstanding_val (idx : INTEGER)
    return INTEGER is
        variable ret : INTEGER;
    begin
        case (idx) is
            when 0 => ret := ID0_NUM_OUTSTANDING;
            when others => ret := 0;
        end case;
        return ret;
    end function num_outstanding_val;

    function or_reduce(a : UNSIGNED(NUM_PORTS-1 downto 0)) return STD_LOGIC is
        variable ret : STD_LOGIC := '0';
    begin
        for i in a'range loop
            ret := ret or a(i);
        end loop;
        return ret;
    end function or_reduce;

    -- component
    component s2mm_gmem_m_axi_reg_slice is
        generic (
            DATA_WIDTH  : integer := 8);
        port (
            clk         : in  std_logic;
            reset       : in  std_logic;
            s_data      : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            s_valid     : in  std_logic;
            s_ready     : out std_logic;
            m_data      : out UNSIGNED(DATA_WIDTH-1 downto 0);
            m_valid     : out std_logic;
            m_ready     : in  std_logic);
    end component s2mm_gmem_m_axi_reg_slice;

    component s2mm_gmem_m_axi_fifo is
        generic (
            MEM_STYLE         : STRING  := "shiftreg";
            DATA_WIDTH        : INTEGER := 8;
            ADDR_WIDTH        : INTEGER := 4;
            DEPTH             : INTEGER := 16);
        port (
            clk               : in  STD_LOGIC;
            reset             : in  STD_LOGIC;
            clk_en            : in  STD_LOGIC;
            if_full_n         : out STD_LOGIC;
            if_write          : in  STD_LOGIC;
            if_din            : in  UNSIGNED(DATA_WIDTH-1 downto 0);
            if_empty_n        : out STD_LOGIC;
            if_read           : in  STD_LOGIC;
            if_dout           : out UNSIGNED(DATA_WIDTH-1 downto 0);
            if_num_data_valid : out UNSIGNED(ADDR_WIDTH downto 0));
    end component s2mm_gmem_m_axi_fifo;

    -- local signal
    signal rs_req_pack_in      : UNSIGNED(ID_WIDTH + ADDR_WIDTH + 7 downto 0);
    signal rs_req_pack_out     : UNSIGNED(ID_WIDTH + ADDR_WIDTH + 7 downto 0);

    signal local_AXI_WREADY    : STD_LOGIC;
    signal local_BUS_WID       : UNSIGNED(ID_WIDTH-1 downto 0);
    signal local_BUS_WDATA     : UNSIGNED(DATA_WIDTH-1 downto 0);
    signal local_BUS_WSTRB     : UNSIGNED(DATA_WIDTH/8-1 downto 0);
    signal local_BUS_WLAST     : STD_LOGIC;
    signal local_BUS_WVALID    : STD_LOGIC;

    signal local_BURST_AWID    : UNSIGNED(ID_WIDTH-1 downto 0);
    signal local_BURST_AWADDR  : UNSIGNED(ADDR_WIDTH-1 downto 0);
    signal local_BURST_AWLEN   : UNSIGNED(7 downto 0);
    signal local_BURST_AWVALID : STD_LOGIC;
    signal local_BURST_AWREADY : STD_LOGIC;

    signal local_BURST_WID     : UNSIGNED(ID_WIDTH-1 downto 0);
    signal local_BURST_WLEN    : UNSIGNED(7 downto 0);
    signal local_BURST_WVALID  : STD_LOGIC;
    signal local_BURST_WREADY  : STD_LOGIC;

    signal burst_handling      : STD_LOGIC;
    signal ready_for_burst     : STD_LOGIC;
    signal ready_for_beat      : STD_LOGIC;
    signal next_burst          : STD_LOGIC;
    signal next_beat           : STD_LOGIC;
    signal num_beat_cnt        : UNSIGNED(7 downto 0);
begin

    rs_req : s2mm_gmem_m_axi_reg_slice
    generic map (
        DATA_WIDTH => ID_WIDTH + ADDR_WIDTH + 8)
    port map (
        clk        => clk,
        reset      => reset,
        s_data     => rs_req_pack_in,
        s_valid    => local_BURST_AWVALID,
        s_ready    => local_BURST_AWREADY,
        m_data     => rs_req_pack_out,
        m_valid    => out_BUS_AWVALID,
        m_ready    => in_BUS_AWREADY);

    rs_req_pack_in <= local_BURST_AWID & local_BURST_AWLEN & local_BURST_AWADDR;
    out_BUS_AWID   <= rs_req_pack_out(ID_WIDTH+ADDR_WIDTH+7 downto ADDR_WIDTH+8);
    out_BUS_AWLEN  <= rs_req_pack_out(ADDR_WIDTH+7 downto ADDR_WIDTH);
    out_BUS_AWADDR <= rs_req_pack_out(ADDR_WIDTH-1 downto 0);

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if reset = '1' then
                burst_handling <= '0';
            elsif clk_en = '1' then
                if (local_BURST_WVALID = '1' and local_BURST_WREADY = '0' and ready_for_burst = '1') then
                    burst_handling <= '1';
                else
                    burst_handling <= '0';
                end if;
            end if;
        end if;
    end process;

    aggressive_gen : if (CONSERVATIVE = 0) generate
        signal rs_burst_pack_in  : UNSIGNED(ID_WIDTH + ADDR_WIDTH + 7 downto 0);
        signal rs_burst_pack_out : UNSIGNED(ID_WIDTH + ADDR_WIDTH + 7 downto 0);
    begin
        rs_burst : s2mm_gmem_m_axi_reg_slice
        generic map (
            DATA_WIDTH => ID_WIDTH + ADDR_WIDTH + 8)
        port map (
            clk        => clk,
            reset      => reset,
            s_data     => rs_burst_pack_in,
            s_valid    => in_BURST_AWVALID,
            s_ready    => out_BURST_AWREADY,
            m_data     => rs_burst_pack_out,
            m_valid    => local_BURST_WVALID,
            m_ready    => local_BURST_WREADY);
        
        rs_burst_pack_in    <= in_BURST_AWID & in_BURST_AWLEN & in_BURST_AWADDR;
        local_BURST_WID     <= rs_burst_pack_out(ADDR_WIDTH+ID_WIDTH+7 downto ADDR_WIDTH+8);
        local_BURST_WLEN    <= rs_burst_pack_out(ADDR_WIDTH+7 downto ADDR_WIDTH);
        local_BURST_AWADDR  <= rs_burst_pack_out(ADDR_WIDTH-1 downto 0);

        out_BURST_WREADY    <= '0';
        local_BURST_AWID    <= local_BURST_WID;
        local_BURST_AWLEN   <= local_BURST_WLEN;
        local_BURST_AWVALID <= local_BURST_WVALID AND (not burst_handling);
    end generate aggressive_gen;

    conservative_gen : if (CONSERVATIVE /= 0) generate
        type ADDR_ARRAY is array (0 to NUM_PORTS-1) of UNSIGNED(ADDR_WIDTH-1 downto 0);
        signal burst_addr        : ADDR_ARRAY;
        signal burst_ready       : UNSIGNED(NUM_PORTS-1 downto 0);
        signal burst_valid       : UNSIGNED(NUM_PORTS-1 downto 0);
        signal burst_write       : UNSIGNED(NUM_PORTS-1 downto 0);
        signal burst_read        : UNSIGNED(NUM_PORTS-1 downto 0);
        signal rs_burst_pack_in  : UNSIGNED(ID_WIDTH+7 downto 0);
        signal rs_burst_pack_out : UNSIGNED(ID_WIDTH+7 downto 0);
    begin
        fifo_req_gen : for idx in 0 to NUM_PORTS-1 generate
            fifo_req : s2mm_gmem_m_axi_fifo
            generic map (
                DATA_WIDTH        => ADDR_WIDTH,
                ADDR_WIDTH        => log2(num_outstanding_val(idx)),
                DEPTH             => num_outstanding_val(idx))
            port map (
                clk               => clk,
                reset             => reset,
                clk_en            => clk_en,
                if_full_n         => burst_ready(idx),
                if_write          => burst_write(idx),
                if_din            => in_BURST_AWADDR,
                if_empty_n        => burst_valid(idx),
                if_read           => burst_read(idx),
                if_dout           => burst_addr(idx),
                if_num_data_valid => open);
        end generate fifo_req_gen;

        rs_burst : s2mm_gmem_m_axi_reg_slice
        generic map (
            DATA_WIDTH      =>  ID_WIDTH + 8)
        port map (
            clk             =>  clk,
            reset           =>  reset,
            s_data          =>  rs_burst_pack_in,
            s_valid         =>  in_BURST_WVALID,
            s_ready         =>  out_BURST_WREADY,
            m_data          =>  rs_burst_pack_out,
            m_valid         =>  local_BURST_WVALID,
            m_ready         =>  local_BURST_WREADY);

        rs_burst_pack_in    <= in_BURST_WID & in_BURST_WLEN;
        local_BURST_WID     <= rs_burst_pack_out(ID_WIDTH+7 downto 8);
        local_BURST_WLEN    <= rs_burst_pack_out(7 downto 0);

        out_BURST_AWREADY   <= or_reduce(burst_ready);
        local_BURST_AWID    <= local_BURST_WID;
        local_BURST_AWLEN   <= local_BURST_WLEN;
        local_BURST_AWADDR  <= burst_addr(TO_INTEGER(local_BURST_WID));
        local_BURST_AWVALID <= burst_valid(TO_INTEGER(local_BURST_WID)) AND local_BURST_WVALID AND (not burst_handling);

        burst_write         <= bit_set(in_BURST_AWID, in_BURST_AWVALID);
        burst_read          <= bit_set(local_BURST_WID, local_BURST_WVALID AND local_BURST_WREADY);
    end generate conservative_gen;

    -- W channel 
    out_AXI_WID      <= local_BURST_WID;
    out_AXI_WREADY   <= local_AXI_WREADY;
    out_BUS_WID      <= local_BUS_WID;
    out_BUS_WDATA    <= local_BUS_WDATA;
    out_BUS_WSTRB    <= local_BUS_WSTRB;
    out_BUS_WLAST    <= local_BUS_WLAST;
    out_BUS_WVALID   <= local_BUS_WVALID;

    local_AXI_WREADY   <= local_BURST_WVALID and ready_for_burst and ready_for_beat;
    local_BURST_WREADY <= next_burst and ready_for_burst;

    ready_for_burst  <= burst_handling or local_BURST_AWREADY;
    ready_for_beat   <= (not local_BUS_WVALID or in_BUS_WREADY);
    next_burst       <= '1' when (num_beat_cnt = local_BURST_WLEN) and next_beat = '1' else '0';
    next_beat        <= in_AXI_WVALID and local_AXI_WREADY;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                local_BUS_WID   <= (others => '0');
                local_BUS_WDATA <= (others => '0');
                local_BUS_WSTRB <= (others => '0');
            elsif clk_en = '1' then
                if next_beat = '1' then
                    local_BUS_WID   <= local_BURST_WID;
                    local_BUS_WDATA <= in_AXI_WDATA;
                    local_BUS_WSTRB <= in_AXI_WSTRB;
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                local_BUS_WVALID <= '0';
            elsif clk_en = '1' then
                if next_beat = '1' then
                    local_BUS_WVALID <= '1';
                elsif ready_for_beat = '1' then
                    local_BUS_WVALID <= '0';
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                local_BUS_WLAST <= '0';
            elsif clk_en = '1' then
                if next_burst = '1' then
                    local_BUS_WLAST <= '1';
                elsif ready_for_beat = '1' then
                    local_BUS_WLAST <= '0';
                end if;
            end if;
        end if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                num_beat_cnt <= (others => '0');
            elsif clk_en = '1' then
                if next_burst = '1' then
                    num_beat_cnt <= (others => '0');
                elsif next_beat = '1' then
                    num_beat_cnt <= num_beat_cnt + 1;
                end if;
            end if;
        end if;
    end process;

end architecture behav;


library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_reg_slice is
    generic (
        DATA_WIDTH  : INTEGER := 8);
    port (
        -- system signals
        clk        : in  STD_LOGIC;
        reset       : in  STD_LOGIC;
        -- slave side
        s_data      : in  UNSIGNED(DATA_WIDTH-1 downto 0);
        s_valid     : in  STD_LOGIC;
        s_ready     : out STD_LOGIC;
        -- master side
        m_data      : out UNSIGNED(DATA_WIDTH-1 downto 0);
        m_valid     : out STD_LOGIC;
        m_ready     : in  STD_LOGIC);
end entity s2mm_gmem_m_axi_reg_slice;

architecture behave of s2mm_gmem_m_axi_reg_slice is
    constant ZERO                     : UNSIGNED(1 downto 0) := "10";
    constant ONE                      : UNSIGNED(1 downto 0) := "11";
    constant TWO                      : UNSIGNED(1 downto 0) := "01";
    signal   data_p1                  : UNSIGNED(DATA_WIDTH-1 downto 0);
    signal   data_p2                  : UNSIGNED(DATA_WIDTH-1 downto 0);
    signal   load_p1                  : STD_LOGIC;
    signal   load_p2                  : STD_LOGIC;
    signal   load_p1_from_p2          : STD_LOGIC;
    signal   s_ready_t                : STD_LOGIC;
    signal   state                    : UNSIGNED(1 downto 0);
    signal   next_st                  : UNSIGNED(1 downto 0);
begin
    s_ready <= s_ready_t;
    m_data  <= data_p1;
    m_valid <= state(0);

    load_p1 <= '1' when (state = ZERO and s_valid = '1') or
                        (state = ONE  and s_valid = '1' and m_ready = '1') or
                        (state = TWO  and m_ready = '1')
               else '0';

    load_p2         <= s_valid and s_ready_t;
    load_p1_from_p2 <= '1' when state = TWO else '0';

    data_p1_proc : process (clk)
    begin
        if (clk'event and clk = '1') then
            if (load_p1 = '1') then
                if (load_p1_from_p2 = '1') then
                    data_p1 <= data_p2;
                else
                    data_p1 <= s_data;
                end if;
            end if;
        end if;
    end process;

    data_p2_proc : process (clk)
    begin
        if (clk'event and clk = '1') then
            if (load_p2 = '1') then
                data_p2 <= s_data;
            end if;
        end if;
    end process;

    s_ready_t_proc : process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                s_ready_t <= '0';
            elsif (state = ZERO) then
                s_ready_t <= '1';
            elsif (state = ONE and next_st = TWO) then
                s_ready_t <= '0';
            elsif (state = TWO and next_st = ONE) then
                s_ready_t <= '1';
            end if;
        end if;
    end process;

    state_proc : process (clk)
    begin
        if (clk'event and clk = '1') then
            if (reset = '1') then
                state <= ZERO;
            else
                state <= next_st;
            end if;
        end if;
    end process;

    next_st_proc : process (state, s_valid, s_ready_t, m_ready)
    begin
        case state is
            when ZERO =>
                if (s_valid = '1' and s_ready_t = '1') then
                    next_st <= ONE;
                else
                    next_st <= ZERO;
                end if;
            when ONE =>
                if (s_valid = '0' and m_ready = '1') then
                    next_st <= ZERO;
                elsif (s_valid = '1' and m_ready = '0') then
                    next_st <= TWO;
                else
                    next_st <= ONE;
                end if;
            when TWO =>
                if (m_ready = '1') then
                    next_st <= ONE;
                else
                    next_st <= TWO;
                end if;
            when others =>
                next_st <= ZERO;
        end case;
    end process;

end architecture behave;


library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_fifo is
    generic (
        MEM_STYLE         : STRING  := "shiftreg";
        DATA_WIDTH        : INTEGER := 8;
        ADDR_WIDTH        : INTEGER := 4;
        DEPTH             : INTEGER := 16);
    port (
        clk               : in  STD_LOGIC;
        reset             : in  STD_LOGIC;
        clk_en            : in  STD_LOGIC;
        if_full_n         : out STD_LOGIC;
        if_write          : in  STD_LOGIC;
        if_din            : in  UNSIGNED(DATA_WIDTH-1 downto 0);
        if_empty_n        : out STD_LOGIC;
        if_read           : in  STD_LOGIC;
        if_dout           : out UNSIGNED(DATA_WIDTH-1 downto 0);
        if_num_data_valid : out UNSIGNED(ADDR_WIDTH downto 0));
end entity s2mm_gmem_m_axi_fifo;

architecture behave of s2mm_gmem_m_axi_fifo is
    signal push       : std_logic;
    signal pop        : std_logic;
    signal full_n     : std_logic := '1';
    signal empty_n    : std_logic := '0';
    signal dout_vld   : std_logic := '0';
    signal mOutPtr    : UNSIGNED(ADDR_WIDTH downto 0) := (others => '0');
begin

    fifo_depth_0_gen : if (DEPTH = 0) generate
    begin
        if_num_data_valid <= TO_UNSIGNED(1, ADDR_WIDTH+1) when if_write = '1' else (others => '0');
        if_full_n         <= if_read;
        if_empty_n        <= if_write;
        if_dout           <= if_din;
    end generate fifo_depth_0_gen;
    
    fifo_depth_1_gen : if (DEPTH = 1) generate
        signal dout_reg : UNSIGNED(DATA_WIDTH-1 downto 0);
    begin
        if_num_data_valid <= TO_UNSIGNED(1, ADDR_WIDTH+1) when dout_vld = '1' else (others => '0');
        if_full_n         <= not dout_vld;
        if_empty_n        <= dout_vld;
        if_dout           <= dout_reg;
        push              <= not dout_vld and if_write;
        pop               <= dout_vld and if_read;

        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    dout_reg <= (others => '0');
                elsif clk_en = '1' then
                    if push = '1' then
                        dout_reg <= if_din;
                    end if;
                end if;
            end if;
        end process;

        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    dout_vld <= '0';
                elsif clk_en = '1' then
                    if push = '1' then
                        dout_vld <= '1';
                    elsif pop = '1' then
                        dout_vld <= '0';
                    end if;
                end if;
            end if;
        end process;
    end generate fifo_depth_1_gen;
    
    fifo_depth_gt1_gen : if (DEPTH > 1) generate
        signal num_data_cnt : UNSIGNED(ADDR_WIDTH downto 0) := (others => '0');
        signal mOutPtr      : UNSIGNED(ADDR_WIDTH downto 0) := (others => '0');
        signal pop_dout     : STD_LOGIC;
    begin
        if_num_data_valid <= num_data_cnt when dout_vld = '1' else (others=> '0');
        if_full_n         <= full_n;
        if_empty_n        <= dout_vld;
        push              <= full_n and if_write;
        pop               <= empty_n and (if_read or not dout_vld);
        pop_dout          <= dout_vld and if_read;

        -- mOutPtr
        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    mOutPtr <= (others => '0');
                elsif (clk_en = '1') then
                    if push = '1' and pop = '0' then
                        mOutPtr <= mOutPtr + 1;
                    elsif push = '0' and pop = '1' then
                        mOutPtr <= mOutPtr - 1;
                    end if;
                end if;
            end if;
        end process;

        -- num_data_cnt
        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    num_data_cnt <= (others => '0');
                elsif (clk_en = '1') then
                    if push = '1' and pop_dout = '0' then
                        num_data_cnt <= num_data_cnt + 1;
                    elsif push = '0' and pop_dout = '1' then
                        num_data_cnt <= num_data_cnt - 1;
                    end if;
                end if;
            end if;
        end process;

        -- full_n
        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    full_n <= '1';
                elsif clk_en = '1' then
                    if push = '1' and pop_dout = '0' and (num_data_cnt = DEPTH - 1) then
                        full_n <= '0';
                    elsif push = '0' and pop_dout = '1' then
                        full_n <= '1';
                    end if;
                end if;
            end if;
        end process;

        -- empty_n
        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    empty_n <= '0';
                elsif clk_en = '1' then
                    if push = '1' and pop = '0' then
                        empty_n <= '1';
                    elsif push = '0' and pop = '1' and (mOutPtr = 1) then
                        empty_n <= '0';
                    end if;
                end if;
            end if;
        end process;

        -- dout_vld
        process (clk) begin
            if clk'event and clk = '1' then
                if reset = '1' then
                    dout_vld <= '0';
                elsif clk_en = '1' then
                    if pop = '1' then
                        dout_vld <= '1';
                    elsif pop_dout = '1' then
                        dout_vld <= '0';
                    end if;
                end if;
            end if;
        end process;

        -- shiftreg based fifo
        fifo_srl_gen: if (MEM_STYLE = "shiftreg") generate
            component s2mm_gmem_m_axi_srl is
                generic (
                    DATA_WIDTH  : integer := 32;
                    ADDR_WIDTH  : integer := 6;
                    DEPTH       : integer := 64);
                port (
                    clk         : in  std_logic;
                    reset       : in  std_logic;
                    clk_en      : in  std_logic;
                    we          : in  std_logic;
                    din         : in  UNSIGNED(DATA_WIDTH-1 downto 0);
                    raddr       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
                    re          : in  std_logic;
                    dout        : out UNSIGNED(DATA_WIDTH-1 downto 0));
            end component;
            signal raddr      : UNSIGNED(ADDR_WIDTH - 1 downto 0) := (others => '0');
        begin
            U_ffo_srl: s2mm_gmem_m_axi_srl
            generic map (
                DATA_WIDTH  => DATA_WIDTH,
                ADDR_WIDTH  => ADDR_WIDTH,
                DEPTH       => DEPTH)
            port map (
                clk         => clk,
                reset       => reset,
                clk_en      => clk_en,
                we          => push,
                din         => if_din,
                re          => pop,
                raddr       => raddr,
                dout        => if_dout);

            process (clk) begin
                if clk'event and clk = '1' then
                    if reset = '1' then
                        raddr <= (others => '0');
                    elsif (clk_en = '1') then
                        if (push = '1' and pop = '0' and empty_n = '1') then
                            raddr <= raddr + 1;
                        elsif (push = '0' and pop = '1' and (raddr /= 0)) then
                            raddr <= raddr - 1;
                        end if;
                    end if;
                end if;
            end process;
        end generate fifo_srl_gen;

        -- mem based fifo
        fifo_mem_gen: if (MEM_STYLE /= "shiftreg") generate
            component s2mm_gmem_m_axi_mem is
                generic (
                    MEM_STYLE   : string  := "auto";
                    DATA_WIDTH  : integer := 32;
                    ADDR_WIDTH  : integer := 6;
                    DEPTH       : integer := 64);
                port (
                    clk         : in  std_logic;
                    reset       : in  std_logic;
                    clk_en      : in  std_logic;
                    we          : in  std_logic;
                    waddr       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
                    din         : in  UNSIGNED(DATA_WIDTH-1 downto 0);
                    re          : in  std_logic;
                    raddr       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
                    dout        : out UNSIGNED(DATA_WIDTH-1 downto 0));
            end component;

            signal waddr      : UNSIGNED(ADDR_WIDTH - 1 downto 0) := (others => '0');
            signal raddr      : UNSIGNED(ADDR_WIDTH - 1 downto 0) := (others => '0');
            signal wnext      : UNSIGNED(ADDR_WIDTH - 1 downto 0);
            signal rnext      : UNSIGNED(ADDR_WIDTH - 1 downto 0);
        begin
            U_ffo_mem: s2mm_gmem_m_axi_mem
            generic map (
                MEM_STYLE   => MEM_STYLE,
                DATA_WIDTH  => DATA_WIDTH,
                ADDR_WIDTH  => ADDR_WIDTH,
                DEPTH       => DEPTH)
            port map (
                clk         => clk,
                reset       => reset,
                clk_en      => clk_en,
                we          => push,
                waddr       => waddr,
                din         => if_din,
                re          => pop,
                raddr       => raddr,
                dout        => if_dout);

            wnext <= waddr           when push = '0'        else
                    (others => '0') when waddr = DEPTH - 2 else
                    waddr + 1;
            rnext <= raddr           when pop = '0'         else
                    (others => '0') when raddr = DEPTH - 2 else
                    raddr + 1;

            process (clk) begin
                if clk'event and clk = '1' then
                    if reset = '1' then
                        waddr <= (others => '0');
                    elsif (clk_en = '1') then
                        waddr <= wnext;
                    end if;
                end if;
            end process;

            process (clk) begin
                if clk'event and clk = '1' then
                    if reset = '1' then
                        raddr <= (others => '0');
                    elsif (clk_en = '1') then
                        raddr <= rnext;
                    end if;
                end if;
            end process;
        end generate fifo_mem_gen;
    end generate fifo_depth_gt1_gen;
    
end architecture behave;   

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_srl is
    generic (
        DATA_WIDTH  : integer := 32;
        ADDR_WIDTH  : integer := 6;
        DEPTH       : integer := 64);
    port (
        clk         : in  std_logic;
        reset       : in  std_logic;
        clk_en      : in  std_logic;
        we          : in  std_logic;
        din         : in  UNSIGNED(DATA_WIDTH-1 downto 0);
        raddr       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        re          : in  std_logic;
        dout        : out UNSIGNED(DATA_WIDTH-1 downto 0));
end s2mm_gmem_m_axi_srl;

architecture behav of s2mm_gmem_m_axi_srl is
    type SRL_ARRAY is array (0 to DEPTH-2) of UNSIGNED(DATA_WIDTH-1 downto 0);
    signal mem : SRL_ARRAY;
begin

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if clk_en = '1' and we = '1' then
                for i in 0 to DEPTH - 3 loop
                    mem(i+1) <= mem(i);
                end loop;
                mem(0) <= din;
            end if;
        end  if;
    end process;

    process (clk)
    begin
        if (clk'event and clk = '1') then
            if reset = '1' then
                dout <= (others => '0');
            elsif clk_en = '1' and re = '1' then
                dout <= mem(to_integer(raddr));
            end if;
        end if;
    end process;

end architecture behav;

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;

entity s2mm_gmem_m_axi_mem is
    generic (
        MEM_STYLE   : string  := "auto";
        DATA_WIDTH  : integer := 32;
        ADDR_WIDTH  : integer := 6;
        DEPTH       : integer := 64);
    port (
        clk         : in  std_logic;
        reset       : in  std_logic;
        clk_en      : in  std_logic;
        we          : in  std_logic;
        waddr       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        din         : in  UNSIGNED(DATA_WIDTH-1 downto 0);
        re          : in  std_logic;
        raddr       : in  UNSIGNED(ADDR_WIDTH-1 downto 0);
        dout        : out UNSIGNED(DATA_WIDTH-1 downto 0));
end s2mm_gmem_m_axi_mem;

architecture behav of s2mm_gmem_m_axi_mem is
    type MEM_ARRAY is array (0 to DEPTH - 2) of UNSIGNED(DATA_WIDTH - 1 downto 0);
    signal mem : MEM_ARRAY;
    attribute ram_style: string;
    attribute ram_style of mem: signal is MEM_STYLE;
begin
    process (clk) begin
        if clk'event and clk = '1' then
            if reset = '1' then
                dout <= ( others=> '0');
            elsif (clk_en = '1' and re = '1') then
                dout <= mem(to_integer(raddr));
            end if;
        end if;
    end process;

    process (clk) begin
        if clk'event and clk = '1' then
            if clk_en = '1' and we = '1' then
                mem(to_integer(waddr)) <= din;
            end if;
        end if;
    end process;
end architecture behav;
