// ==============================================================
// Generated by Vitis HLS v2025.1
// Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
// Copyright 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
// ==============================================================
`timescale 1ns/1ps
`default_nettype none

module s2mm_gmem_m_axi
#(parameter
    C_M_AXI_ID_WIDTH          = 1,
    C_M_AXI_ADDR_WIDTH        = 32,
    C_M_AXI_DATA_WIDTH        = 32, // power of 2 & range: 2 to 1024
    C_M_AXI_AWUSER_WIDTH      = 1,
    C_M_AXI_ARUSER_WIDTH      = 1,
    C_M_AXI_WUSER_WIDTH       = 1,
    C_M_AXI_RUSER_WIDTH       = 1,
    C_M_AXI_BUSER_WIDTH       = 1,
    C_TARGET_ADDR             = 32'h00000000,
    C_USER_VALUE              = 1'b0,
    C_PROT_VALUE              = 3'b000,
    C_CACHE_VALUE             = 4'b0011,
    CONSERVATIVE              = 0,
    MAX_READ_BURST_LENGTH     = 16,
    MAX_WRITE_BURST_LENGTH    = 16,
    NUM_READ_OUTSTANDING      = 16, // global read outstanding value
    NUM_WRITE_OUTSTANDING     = 16, // global write outstanding value
    USER_MAXREQS              = 16,
    USER_LEN_WIDTH            = 32,
    // channel configurations
    CH0_USER_DW               = 32,
    CH0_USER_AW               = 32,
    CH0_NUM_READ_OUTSTANDING  = 2,
    CH0_NUM_WRITE_OUTSTANDING = 2,
    CH0_USER_RFIFONUM_WIDTH   = 6,
    MAXI_BUFFER_IMPL          = "block"
)(
    // system signal
    input  wire                               ACLK,
    input  wire                               ARESET,
    input  wire                               ACLK_EN,
    // write address channel
    output wire [C_M_AXI_ID_WIDTH-1:0]        AWID,
    output wire [C_M_AXI_ADDR_WIDTH-1:0]      AWADDR,
    output wire [7:0]                         AWLEN,
    output wire [2:0]                         AWSIZE,
    output wire [1:0]                         AWBURST,
    output wire [1:0]                         AWLOCK,
    output wire [3:0]                         AWCACHE,
    output wire [2:0]                         AWPROT,
    output wire [3:0]                         AWQOS,
    output wire [3:0]                         AWREGION,
    output wire [C_M_AXI_AWUSER_WIDTH-1:0]    AWUSER,
    output wire                               AWVALID,
    input  wire                               AWREADY,
    // write data channel
    output wire [C_M_AXI_ID_WIDTH-1:0]        WID,
    output wire [C_M_AXI_DATA_WIDTH-1:0]      WDATA,
    output wire [C_M_AXI_DATA_WIDTH/8-1:0]    WSTRB,
    output wire                               WLAST,
    output wire [C_M_AXI_WUSER_WIDTH-1:0]     WUSER,
    output wire                               WVALID,
    input  wire                               WREADY,
    // write response channel
    input  wire [C_M_AXI_ID_WIDTH-1:0]        BID,
    input  wire [1:0]                         BRESP,
    input  wire [C_M_AXI_BUSER_WIDTH-1:0]     BUSER,
    input  wire                               BVALID,
    output wire                               BREADY,
    // read address channel
    output wire [C_M_AXI_ID_WIDTH-1:0]        ARID,
    output wire [C_M_AXI_ADDR_WIDTH-1:0]      ARADDR,
    output wire [7:0]                         ARLEN,
    output wire [2:0]                         ARSIZE,
    output wire [1:0]                         ARBURST,
    output wire [1:0]                         ARLOCK,
    output wire [3:0]                         ARCACHE,
    output wire [2:0]                         ARPROT,
    output wire [3:0]                         ARQOS,
    output wire [3:0]                         ARREGION,
    output wire [C_M_AXI_ARUSER_WIDTH-1:0]    ARUSER,
    output wire                               ARVALID,
    input  wire                               ARREADY,
    // read data channel
    input  wire [C_M_AXI_ID_WIDTH-1:0]        RID,
    input  wire [C_M_AXI_DATA_WIDTH-1:0]      RDATA,
    input  wire [1:0]                         RRESP,
    input  wire                               RLAST,
    input  wire [C_M_AXI_RUSER_WIDTH-1:0]     RUSER,
    input  wire                               RVALID,
    output wire                               RREADY,
    // multiple internal channels 
    // channel 0 --  WRITE-ONLY 
    input  wire [CH0_USER_AW-1:0]             I_CH0_AWADDR,
    input  wire [USER_LEN_WIDTH-1:0]          I_CH0_AWLEN,
    input  wire                               I_CH0_AWVALID,
    output wire                               I_CH0_AWREADY,
    input  wire [CH0_USER_DW-1:0]             I_CH0_WDATA,
    input  wire [CH0_USER_DW/8-1:0]           I_CH0_WSTRB,
    input  wire                               I_CH0_WVALID,
    output wire                               I_CH0_WREADY,
    output wire                               I_CH0_BVALID,
    input  wire                               I_CH0_BREADY,
    input  wire [CH0_USER_AW-1:0]             I_CH0_ARADDR,
    input  wire [USER_LEN_WIDTH-1:0]          I_CH0_ARLEN,
    input  wire                               I_CH0_ARVALID,
    output wire                               I_CH0_ARREADY,
    output wire [CH0_USER_DW-1:0]             I_CH0_RDATA,
    output wire                               I_CH0_RVALID,
    input  wire                               I_CH0_RREADY,
    output wire [CH0_USER_RFIFONUM_WIDTH-1:0] I_CH0_RFIFONUM
    );
//------------------------Parameter----------------------
    localparam
        NUM_READ_PORTS  = 0,
        NUM_WRITE_PORTS = 1;

//------------------------Local signal-------------------
    // AW/W/B channel signals 
    wire [C_M_AXI_ADDR_WIDTH-1:0]   local_CHN_AWADDR  [0 : NUM_WRITE_PORTS-1];
    wire [USER_LEN_WIDTH-1:0]       local_CHN_AWLEN   [0 : NUM_WRITE_PORTS-1];
    wire [NUM_WRITE_PORTS-1:0]      local_CHN_AWVALID;
    wire [NUM_WRITE_PORTS-1:0]      local_CHN_AWREADY;
    wire [C_M_AXI_DATA_WIDTH-1:0]   local_CHN_WDATA   [0 : NUM_WRITE_PORTS-1];
    wire [C_M_AXI_DATA_WIDTH/8-1:0] local_CHN_WSTRB   [0 : NUM_WRITE_PORTS-1];
    wire [NUM_WRITE_PORTS-1:0]      local_CHN_WVALID;
    wire [NUM_WRITE_PORTS-1:0]      local_CHN_WREADY;

    wire [C_M_AXI_ID_WIDTH-1:0]     local_AXI_AWID;
    wire [C_M_AXI_ADDR_WIDTH-1:0]   local_AXI_AWADDR;
    wire [USER_LEN_WIDTH-1:0]       local_AXI_AWLEN;
    wire                            local_AXI_AWVALID;
    wire [NUM_WRITE_PORTS-1:0]      local_AXI_AWREADY;
    wire [C_M_AXI_ID_WIDTH-1:0]     local_AXI_WID;
    wire [C_M_AXI_DATA_WIDTH-1:0]   local_AXI_WDATA;
    wire [C_M_AXI_DATA_WIDTH/8-1:0] local_AXI_WSTRB;
    wire                            local_AXI_WVALID;
    wire                            local_AXI_WREADY;
    wire [NUM_WRITE_PORTS-1:0]      local_AXI_BVALID;
    wire [NUM_WRITE_PORTS-1:0]      local_AXI_BREADY;
    
    wire [7:0]                      local_BURST_AWLEN;
    wire [NUM_WRITE_PORTS-1:0]      local_BURST_AWVALID;
    wire [C_M_AXI_ID_WIDTH-1:0]     local_BURST_WID;
    wire [7:0]                      local_BURST_WLEN;
    wire                            local_BURST_WVALID;
    wire [NUM_WRITE_PORTS-1:0]      local_BURST_WREADY;
    wire [7:0]                      local_CHN_BURST_WLEN [0 : NUM_WRITE_PORTS-1];
    wire [NUM_WRITE_PORTS-1:0]      local_CHN_BURST_WVALID;
    wire [NUM_WRITE_PORTS-1:0]      local_CHN_BURST_WREADY;

    // AR/R channel signals 

    // flush logic 

    // AXI Ports Initialization 
    assign ARID     = {C_M_AXI_ID_WIDTH{1'b0}};
    assign ARADDR   = {C_M_AXI_ADDR_WIDTH{1'b0}};
    assign ARLEN    = 8'd0;
    assign ARSIZE   = 3'd0;
    assign ARBURST  = 2'd0;
    assign ARLOCK   = 2'd0;
    assign ARCACHE  = 4'd0;
    assign ARPROT   = 3'd0;
    assign ARQOS    = 4'd0;
    assign ARREGION = 4'd0;
    assign ARUSER   = {C_M_AXI_ARUSER_WIDTH{1'b0}};
    assign ARVALID  = 1'b0;
    assign RREADY   = 1'b0;
    
    // Kernel Ports Initialization 
    assign I_CH0_ARREADY    = 1'b0;
    assign I_CH0_RDATA      = {CH0_USER_DW{1'b0}};
    assign I_CH0_RVALID     = 1'b0;
    assign I_CH0_RFIFONUM   = {CH0_USER_RFIFONUM_WIDTH{1'b0}};
    
    // Internal Ports Mapping
    assign local_AXI_AWID      = {C_M_AXI_ID_WIDTH{1'b0}};
    assign local_AXI_AWVALID   = local_CHN_AWVALID[0];
    assign local_AXI_WVALID    = local_CHN_WVALID[0];
    assign local_CHN_AWREADY   = local_AXI_AWREADY;
    assign local_CHN_WREADY[0] = local_AXI_WREADY;
    assign local_BURST_WID     = {C_M_AXI_ID_WIDTH{1'b0}};
    assign local_BURST_WVALID  = local_CHN_BURST_WVALID[0];
    assign local_CHN_BURST_WREADY = local_BURST_WREADY;
    assign local_AXI_AWADDR    = local_CHN_AWADDR[local_AXI_AWID];
    assign local_AXI_AWLEN     = local_CHN_AWLEN[local_AXI_AWID];
    assign local_AXI_WDATA     = local_CHN_WDATA[local_AXI_WID];
    assign local_AXI_WSTRB     = local_CHN_WSTRB[local_AXI_WID];
    assign local_BURST_WLEN    = local_CHN_BURST_WLEN[local_BURST_WID];

    // flush logic
//------------------------Instantiation------------------
    // ================== STORE UNITS ================== 
    // store_unit for channel 0
    s2mm_gmem_m_axi_store #(
        .CONSERVATIVE           ( CONSERVATIVE),
        .C_TARGET_ADDR          ( C_TARGET_ADDR ),
        .NUM_WRITE_OUTSTANDING  ( CH0_NUM_WRITE_OUTSTANDING),
        .MAX_WRITE_BURST_LENGTH ( MAX_WRITE_BURST_LENGTH ),
        .BUS_ADDR_WIDTH         ( C_M_AXI_ADDR_WIDTH ),
        .BUS_DATA_WIDTH         ( C_M_AXI_DATA_WIDTH ),
        .USER_DW                ( CH0_USER_DW ),
        .USER_AW                ( CH0_USER_AW ),
        .USER_LEN_WIDTH         ( USER_LEN_WIDTH ),
        .USER_MAXREQS           ( USER_MAXREQS ),
        .BUFFER_IMPL            ( MAXI_BUFFER_IMPL )
    ) store_unit_0 (
        .ACLK                   ( ACLK ),
        .ARESET                 ( ARESET ),
        .ACLK_EN                ( ACLK_EN ),
        .out_AXI_AWADDR         ( local_CHN_AWADDR[0] ),
        .out_AXI_AWLEN          ( local_CHN_AWLEN[0] ),
        .out_AXI_AWVALID        ( local_CHN_AWVALID[0] ),
        .in_AXI_AWREADY         ( local_CHN_AWREADY[0] ),
        .in_BURST_AWLEN         ( local_BURST_AWLEN ),
        .in_BURST_AWVALID       ( local_BURST_AWVALID[0] ),
        .out_BURST_WLEN         ( local_CHN_BURST_WLEN[0] ),
        .out_BURST_WVALID       ( local_CHN_BURST_WVALID[0] ),
        .in_BURST_WREADY        ( local_CHN_BURST_WREADY[0] ),
        .out_AXI_WDATA          ( local_CHN_WDATA[0] ),
        .out_AXI_WSTRB          ( local_CHN_WSTRB[0] ),
        .out_AXI_WVALID         ( local_CHN_WVALID[0] ),
        .in_AXI_WREADY          ( local_CHN_WREADY[0] ),
        .in_AXI_BVALID          ( local_AXI_BVALID[0] ),
        .out_AXI_BREADY         ( local_AXI_BREADY[0] ),
        .in_HLS_AWADDR          ( I_CH0_AWADDR  ),
        .in_HLS_AWLEN           ( I_CH0_AWLEN   ),
        .in_HLS_AWVALID         ( I_CH0_AWVALID ),
        .out_HLS_AWREADY        ( I_CH0_AWREADY ),
        .in_HLS_WDATA           ( I_CH0_WDATA   ),
        .in_HLS_WSTRB           ( I_CH0_WSTRB   ),
        .in_HLS_WVALID          ( I_CH0_WVALID  ),
        .out_HLS_WREADY         ( I_CH0_WREADY  ),
        .out_HLS_BVALID         ( I_CH0_BVALID  ),
        .in_HLS_BREADY          ( I_CH0_BREADY  ));

    // ================== LOAD UNITS ================== 

    
    // ================== AXI BUS READ/WRITE ==================
    // s2mm_gmem_m_axi_write 
    s2mm_gmem_m_axi_write #(
        .CONSERVATIVE           ( CONSERVATIVE),
        .C_M_AXI_ID_WIDTH       ( C_M_AXI_ID_WIDTH ),
        .C_M_AXI_AWUSER_WIDTH   ( C_M_AXI_AWUSER_WIDTH ),
        .C_M_AXI_WUSER_WIDTH    ( C_M_AXI_WUSER_WIDTH ),
        .C_M_AXI_BUSER_WIDTH    ( C_M_AXI_BUSER_WIDTH ),
        .C_USER_VALUE           ( C_USER_VALUE ),
        .C_PROT_VALUE           ( C_PROT_VALUE ),
        .C_CACHE_VALUE          ( C_CACHE_VALUE ),
        .BUS_ADDR_WIDTH         ( C_M_AXI_ADDR_WIDTH ),
        .BUS_DATA_WIDTH         ( C_M_AXI_DATA_WIDTH ),
        .USER_LEN_WIDTH         ( USER_LEN_WIDTH ),
        .MAX_WRITE_BURST_LENGTH ( MAX_WRITE_BURST_LENGTH ),
        .NUM_WRITE_OUTSTANDING  ( NUM_WRITE_OUTSTANDING ),
        // outstanding control for channels
        .ID0_NUM_WRITE_OUTSTANDING  ( CH0_NUM_WRITE_OUTSTANDING ), 
        .NUM_WRITE_PORTS        ( NUM_WRITE_PORTS )
    ) bus_write (
        .ACLK                   ( ACLK ),
        .ARESET                 ( ARESET ),
        .ACLK_EN                ( ACLK_EN ),
        .out_BUS_AWID           ( AWID ),
        .out_BUS_AWSIZE         ( AWSIZE ),
        .out_BUS_AWBURST        ( AWBURST ),
        .out_BUS_AWLOCK         ( AWLOCK ),
        .out_BUS_AWCACHE        ( AWCACHE ),
        .out_BUS_AWPROT         ( AWPROT ),
        .out_BUS_AWQOS          ( AWQOS ),
        .out_BUS_AWREGION       ( AWREGION ),
        .out_BUS_AWUSER         ( AWUSER ),
        .out_BUS_AWADDR         ( AWADDR ),
        .out_BUS_AWLEN          ( AWLEN ),
        .out_BUS_AWVALID        ( AWVALID ),
        .in_BUS_AWREADY         ( AWREADY ),
        .out_BUS_WID            ( WID ),
        .out_BUS_WUSER          ( WUSER ),
        .out_BUS_WDATA          ( WDATA ),
        .out_BUS_WSTRB          ( WSTRB ),
        .out_BUS_WLAST          ( WLAST ),
        .out_BUS_WVALID         ( WVALID ),
        .in_BUS_WREADY          ( WREADY ),
        .in_BUS_BID             ( BID ),
        .in_BUS_BRESP           ( BRESP ),
        .in_BUS_BUSER           ( BUSER ),
        .in_BUS_BVALID          ( BVALID ),
        .out_BUS_BREADY         ( BREADY ),
        .in_AXI_AWID            ( local_AXI_AWID ),
        .in_AXI_AWVALID         ( local_AXI_AWVALID ),
        .out_AXI_AWREADY        ( local_AXI_AWREADY ),
        .in_AXI_AWADDR          ( local_AXI_AWADDR ),
        .in_AXI_AWLEN           ( local_AXI_AWLEN ),
        .out_AXI_WID            ( local_AXI_WID ),
        .in_AXI_WVALID          ( local_AXI_WVALID ),
        .out_AXI_WREADY         ( local_AXI_WREADY ),
        .in_AXI_WSTRB           ( local_AXI_WSTRB ),
        .in_AXI_WDATA           ( local_AXI_WDATA ),
        .out_AXI_BVALID         ( local_AXI_BVALID ),
        .in_AXI_BREADY          ( local_AXI_BREADY ),
        .out_BURST_AWLEN        ( local_BURST_AWLEN ),
        .out_BURST_AWVALID      ( local_BURST_AWVALID ),
        .in_BURST_WID           ( local_BURST_WID ),
        .in_BURST_WLEN          ( local_BURST_WLEN ),
        .in_BURST_WVALID        ( local_BURST_WVALID ),
        .out_BURST_WREADY       ( local_BURST_WREADY )
    );
    
    // s2mm_gmem_m_axi_read 


endmodule
`default_nettype wire
// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689
`timescale 1ns/1ps


module s2mm_gmem_m_axi_store
#(parameter
    CONSERVATIVE                       = 0,
    C_TARGET_ADDR                      = 32'h00000000,
    NUM_WRITE_OUTSTANDING              = 2,
    MAX_WRITE_BURST_LENGTH             = 16,
    BUS_ADDR_WIDTH                     = 32,
    BUS_DATA_WIDTH                     = 32,
    USER_DW                            = 16,
    USER_AW                            = 32,
    USER_LEN_WIDTH                     = 32,
    USER_MAXREQS                       = 16,
    BUFFER_IMPL                        = "auto"
)(
    // system signal
    input  wire                        ACLK,
    input  wire                        ARESET,
    input  wire                        ACLK_EN,
    // write address channel
    output wire [BUS_ADDR_WIDTH-1:0]   out_AXI_AWADDR,
    output wire [USER_LEN_WIDTH-1:0]   out_AXI_AWLEN,
    output wire                        out_AXI_AWVALID,
    input  wire                        in_AXI_AWREADY,
    // write burst throttle
    input  wire [7:0]                  in_BURST_AWLEN,
    input  wire                        in_BURST_AWVALID,
    output wire [7:0]                  out_BURST_WLEN,
    output wire                        out_BURST_WVALID,
    input  wire                        in_BURST_WREADY,
    // write data channel
    output wire [BUS_DATA_WIDTH-1:0]   out_AXI_WDATA,
    output wire [BUS_DATA_WIDTH/8-1:0] out_AXI_WSTRB,
    output wire                        out_AXI_WVALID,
    input  wire                        in_AXI_WREADY,
    // write response channel
    input  wire                        in_AXI_BVALID,
    output wire                        out_AXI_BREADY,
    // internal bus ports
    // write address
    input  wire [USER_AW-1:0]          in_HLS_AWADDR,
    input  wire [USER_LEN_WIDTH-1:0]   in_HLS_AWLEN,
    input  wire                        in_HLS_AWVALID,
    output wire                        out_HLS_AWREADY,
    // write data
    input  wire [USER_DW-1:0]          in_HLS_WDATA,
    input  wire [USER_DW/8-1:0]        in_HLS_WSTRB,
    input  wire                        in_HLS_WVALID,
    output wire                        out_HLS_WREADY,
    // write response
    output wire                        out_HLS_BVALID,
    input  wire                        in_HLS_BREADY
);

//------------------------Parameter----------------------
    localparam
        USER_DATA_WIDTH = calc_data_width(USER_DW),
        USER_DATA_BYTES = USER_DATA_WIDTH / 8,
        USER_DATA_ALIGN = USER_DATA_WIDTH,
        USER_ADDR_ALIGN = log2(USER_DATA_BYTES),
        WREQ_PACK_WIDTH = USER_AW + USER_LEN_WIDTH,
        BUS_DATA_BYTES  = BUS_DATA_WIDTH / 8,
        BUS_ADDR_ALIGN  = log2(BUS_DATA_BYTES),
        // wdata buffer size
        WBUFF_DEPTH     = (USER_DATA_ALIGN == BUS_DATA_WIDTH) ? 2 * MAX_WRITE_BURST_LENGTH : MAX_WRITE_BURST_LENGTH,
        PREFERRED_IMPL  = (WBUFF_DEPTH <= 32) ? "shiftreg" : BUFFER_IMPL,
        BURST_LEN_WIDTH = max(log2(WBUFF_DEPTH + 1), 8),
        TARGET_ADDR     = C_TARGET_ADDR & (32'hffffffff << USER_ADDR_ALIGN);

//------------------------Task and function--------------

    function integer max;
        input integer x;
        input integer y;
    begin
        max = (x > y) ? x : y;
    end
    endfunction

    function integer calc_data_width;
        input integer x;
        integer y;
    begin
        y = 8;
        while (y < x) y = y * 2;
        calc_data_width = y;
    end
    endfunction

    function integer log2;
        input integer x;
        integer n, m;
    begin
        n = 0;
        m = 1;
        while (m < x) begin
            n = n + 1;
            m = m * 2;
        end
        log2 = n;
    end
    endfunction

//------------------------Local signal-------------------
    wire                                next_wreq;
    wire                                ready_for_wreq;
    wire                                wreq_ready;
    wire                                wreq_valid;
    wire                                valid_length;

    wire [USER_AW-1:0]                  wreq_addr;
    wire [USER_LEN_WIDTH-1:0]           wreq_len;
    reg  [BUS_ADDR_WIDTH-1:0]           tmp_addr;
    reg  [USER_LEN_WIDTH-1:0]           tmp_len;
    reg                                 tmp_valid;

    wire                                local_AXI_WVALID;
    wire                                local_AXI_WREADY;
    wire [BUS_DATA_WIDTH-1:0]           local_AXI_WDATA;
    wire [BUS_DATA_BYTES-1:0]           local_AXI_WSTRB;

    wire                                wrsp_ready;
    wire                                wrsp_valid;
    wire                                wrsp_read;
    wire                                wrsp_type;
    wire                                ursp_ready;
    wire                                ursp_write;

    wire [WREQ_PACK_WIDTH-1:0]          in_wreq_pack;
    wire [WREQ_PACK_WIDTH-1:0]          out_wreq_pack;
    // regslice io ?  no 
    // enable regslice on W channel  no 
//------------------------Instantiation------------------
    // AW channel regslice 

    // AW channel fifo
    s2mm_gmem_m_axi_fifo #(
        .DATA_WIDTH        (WREQ_PACK_WIDTH),
        .ADDR_WIDTH        (log2(USER_MAXREQS)),
        .DEPTH             (USER_MAXREQS)
    ) fifo_wreq (
        .clk               (ACLK),
        .reset             (ARESET),
        .clk_en            (ACLK_EN),
        .if_full_n         (out_HLS_AWREADY),
        .if_write          (in_HLS_AWVALID),
        .if_din            (in_wreq_pack),
        .if_empty_n        (wreq_valid),
        .if_read           (next_wreq),
        .if_dout           (out_wreq_pack),
        .if_num_data_valid ());
    // ===================================================================
    // start of AWADDR PREPROCESSOR
    assign in_wreq_pack    = {in_HLS_AWLEN, in_HLS_AWADDR};
    assign {wreq_len, wreq_addr} = out_wreq_pack;

    assign next_wreq       = wreq_valid && ready_for_wreq && wrsp_ready;
    assign ready_for_wreq  = ~tmp_valid || (in_AXI_AWREADY && wreq_ready);
    assign valid_length    = (wreq_len != 0) && !wreq_len[USER_LEN_WIDTH-1];

    assign out_AXI_AWLEN   = tmp_len;   // Byte length
    assign out_AXI_AWADDR  = tmp_addr;  // Byte address
    assign out_AXI_AWVALID = tmp_valid && wreq_ready;

    always @(posedge ACLK)
    begin
        if (ARESET) begin
            tmp_len  <= 0;
            tmp_addr <= 0;
        end
        else if (ACLK_EN) begin
            if(next_wreq) begin
                tmp_len  <= (wreq_len << USER_ADDR_ALIGN) - 1;            // byte length
                tmp_addr <= TARGET_ADDR + (wreq_addr << USER_ADDR_ALIGN); // byte address
            end
        end
    end
 
    always @(posedge ACLK) 
    begin
        if (ARESET)
            tmp_valid <= 1'b0;
        else if (ACLK_EN) begin
            if (next_wreq && valid_length)
                tmp_valid <= 1'b1;
            else if (in_AXI_AWREADY && wreq_ready)
                tmp_valid <= 1'b0;
        end
    end
    // end of AWADDR PREPROCESSOR
    // ===================================================================

    // =================================================================== 
    // start of WRITE BURST throttling control
    generate if (CONSERVATIVE == 0) begin : aggressive_gen

        assign out_BURST_WLEN   = 8'd0;
        assign out_BURST_WVALID = 1'b0;

    end
    // conservative mode
    else begin : conservative_gen
        reg  [7:0]                 local_BURST_WLEN;
        reg                        local_BURST_WVALID;
        wire                       next_burst;
        wire [7:0]                 burst_len;
        wire                       burst_valid;
        wire                       burst_ready;
        wire                       beat_write;
        wire [BURST_LEN_WIDTH:0]   num_beat_pred_br11;
        wire [BURST_LEN_WIDTH:0]   num_beat_pred_br10;
        wire [BURST_LEN_WIDTH:0]   num_beat_pred_br01;
        wire [BURST_LEN_WIDTH:0]   num_beat_pred_br00;
        reg  [BURST_LEN_WIDTH-1:0] num_beat_pred;
        reg  [BURST_LEN_WIDTH-1:0] num_beat_cnt;

        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH        (8),
            .ADDR_WIDTH        (log2(NUM_WRITE_OUTSTANDING)),
            .DEPTH             (NUM_WRITE_OUTSTANDING)
        ) fifo_burst (
            .clk               (ACLK),
            .reset             (ARESET),
            .clk_en            (ACLK_EN),
            .if_full_n         (),
            .if_write          (in_BURST_AWVALID),
            .if_din            (in_BURST_AWLEN),
            .if_empty_n        (burst_valid),
            .if_read           (next_burst),
            .if_dout           (burst_len),
            .if_num_data_valid ());

        assign out_BURST_WLEN   = local_BURST_WLEN;
        assign out_BURST_WVALID = local_BURST_WVALID;
        assign burst_ready      = ~local_BURST_WVALID || in_BURST_WREADY;
        assign next_burst       = burst_valid && burst_ready && ~num_beat_pred_br10[BURST_LEN_WIDTH];
        assign beat_write       = local_AXI_WREADY && local_AXI_WVALID;

        always @(posedge ACLK)
        begin
            if (ARESET)
                local_BURST_WVALID <= 1'b0;
            else if (ACLK_EN) begin
                if (next_burst)
                    local_BURST_WVALID <= 1'b1; 
                else if (in_BURST_WREADY)
                    local_BURST_WVALID <= 1'b0;
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                local_BURST_WLEN <= 8'd0;
            else if (ACLK_EN) begin
                if (next_burst)
                    local_BURST_WLEN <= burst_len;
            end
        end

        assign num_beat_pred_br11 = {1'b0,num_beat_cnt} - burst_len;
        assign num_beat_pred_br10 = {1'b0,num_beat_cnt} - burst_len - 1'b1;
        assign num_beat_pred_br01 = {1'b0,num_beat_cnt} + 1'b1;
        assign num_beat_pred_br00 = {1'b0,num_beat_cnt};

        always @(*) begin
            case ({next_burst, beat_write})
                2'b11 : num_beat_pred   = num_beat_pred_br11[BURST_LEN_WIDTH-1:0];
                2'b10 : num_beat_pred   = num_beat_pred_br10[BURST_LEN_WIDTH-1:0];
                2'b01 : num_beat_pred   = num_beat_pred_br01[BURST_LEN_WIDTH-1:0];
                default : num_beat_pred = num_beat_pred_br00[BURST_LEN_WIDTH-1:0];
            endcase
        end

        always @(posedge ACLK) 
        begin
            if (ARESET)
                num_beat_cnt <= 0;
            else if (ACLK_EN) begin
                num_beat_cnt <= num_beat_pred;
            end
        end

    end
    endgenerate

    // end of WRITE BURST throttling control
    // ===================================================================
    // W channel regslice 

    // W channel fifo
    s2mm_gmem_m_axi_fifo #(
        .MEM_STYLE         (PREFERRED_IMPL),
        .DATA_WIDTH        (BUS_DATA_WIDTH + BUS_DATA_BYTES),
        .ADDR_WIDTH        (log2(WBUFF_DEPTH)),
        .DEPTH             (WBUFF_DEPTH)
    ) buff_wdata (
        .clk               (ACLK),
        .reset             (ARESET),
        .clk_en            (ACLK_EN),
        .if_full_n         (local_AXI_WREADY),
        .if_write          (local_AXI_WVALID),
        .if_din            ({local_AXI_WSTRB, local_AXI_WDATA}),
        .if_empty_n        (out_AXI_WVALID),
        .if_read           (in_AXI_WREADY),
        .if_dout           ({out_AXI_WSTRB, out_AXI_WDATA}),
        .if_num_data_valid ());
    // ===================================================================
    // start of WDATA PREPROCESSOR
    generate
    if (USER_DATA_ALIGN == BUS_DATA_WIDTH) begin : bus_equal_gen
        assign local_AXI_WDATA  = in_HLS_WDATA;
        assign local_AXI_WSTRB  = in_HLS_WSTRB;
        assign local_AXI_WVALID = in_HLS_WVALID;
        assign out_HLS_WREADY   = local_AXI_WREADY;
        assign wreq_ready       = 1'b1;
    end
    else if (USER_DATA_ALIGN < BUS_DATA_WIDTH) begin : bus_wide_gen
        localparam
            TOTAL_PADS        = BUS_DATA_WIDTH / USER_DATA_ALIGN,
            PAD_ALIGN         = log2(TOTAL_PADS),
            BEAT_LEN_WIDTH    = USER_LEN_WIDTH - BUS_ADDR_ALIGN,
            OFFSET_PACK_WIDTH = 2*PAD_ALIGN+BEAT_LEN_WIDTH,
            WBUFF_IN_DEPTH    = MAX_WRITE_BURST_LENGTH * TOTAL_PADS;

        function [TOTAL_PADS-1:0]   decoder;
            input [PAD_ALIGN-1:0]   din;
            reg  [TOTAL_PADS-1:0]   dout;
            integer i;
        begin
            dout = {TOTAL_PADS{1'b0}};
            for (i = 0; i < din; i = i + 1)
                dout[i] = 1'b1;
            decoder = dout;
        end
        endfunction

        wire [BUS_ADDR_ALIGN-1:0]    tmp_addr_end;
        wire                         offset_full_n;
        wire                         offset_write;
        wire                         offset_empty_n;
        wire                         offset_read;
        reg                          offset_valid;
        wire                         next_offset;

        wire [OFFSET_PACK_WIDTH-1:0] offset_pack_in;
        wire [OFFSET_PACK_WIDTH-1:0] offset_pack_out;
        reg  [OFFSET_PACK_WIDTH-1:0] offset_pack_buf;
        wire [PAD_ALIGN-1:0]         start_offset;
        wire [PAD_ALIGN-1:0]         end_offset;
        wire [PAD_ALIGN-1:0]         head_offset;
        wire [PAD_ALIGN-1:0]         tail_offset;
        wire [BEAT_LEN_WIDTH-1:0]    align_len;
        wire [BEAT_LEN_WIDTH-1:0]    total_len;
        wire [BEAT_LEN_WIDTH-1:0]    beat_len;
        reg  [BEAT_LEN_WIDTH-1:0]    beat_len_cnt;

        wire [TOTAL_PADS-1:0]        add_head;
        wire [TOTAL_PADS-1:0]        add_tail;
        wire [TOTAL_PADS-1:0]        pad_oh;
        reg  [TOTAL_PADS-1:0]        pad_oh_reg;

        wire [TOTAL_PADS-1:0]        head_pad_sel;
        wire [0 : TOTAL_PADS-1]      tail_pad_sel; // reverse
        wire                         ready_for_data;
        wire                         next_pad;
        reg                          first_pad;
        wire                         last_pad;
        reg                          first_beat_set;
        reg                          last_beat_set;
        reg                          single_beat;
        wire                         first_beat;
        wire                         last_beat;
        wire                         next_beat;

        reg  [BUS_DATA_WIDTH-1:0]    data_buf;
        reg  [BUS_DATA_BYTES-1:0]    strb_buf;
        reg                          data_valid;

        wire [USER_DW+USER_DW/8-1:0] in_wdata_pack;
        wire                         in_wdata_vld;
        wire                         out_wdata_rdy;
        wire                         local_HLS_WVALID;
        wire                         local_HLS_WREADY;
        wire [USER_DW-1:0]           local_HLS_WDATA;
        wire [(USER_DW/8)-1:0]       local_HLS_WSTRB;

        // Recording the offset of start & end address to align beats from data USER_DATA_ALIGN < BUS_DW.
        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH         (OFFSET_PACK_WIDTH),
            .ADDR_WIDTH         (log2(NUM_WRITE_OUTSTANDING)),
            .DEPTH              (NUM_WRITE_OUTSTANDING)
        ) wreq_offset (
            .clk                (ACLK),
            .reset              (ARESET),
            .clk_en             (ACLK_EN),
            .if_full_n          (offset_full_n),
            .if_write           (offset_write),
            .if_din             (offset_pack_in),
            .if_empty_n         (offset_empty_n),
            .if_read            (offset_read),
            .if_dout            (offset_pack_out),
            .if_num_data_valid  ());

        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH        (USER_DW + USER_DW/8),
            .ADDR_WIDTH        (log2(WBUFF_IN_DEPTH)),
            .DEPTH             (WBUFF_IN_DEPTH)
        ) buff_wdata_in (
            .clk               (ACLK),
            .reset             (ARESET),
            .clk_en            (ACLK_EN),
            .if_full_n         (out_wdata_rdy),
            .if_write          (in_wdata_vld),
            .if_din            (in_wdata_pack),
            .if_empty_n        (local_HLS_WVALID),
            .if_read           (local_HLS_WREADY),
            .if_dout           ({local_HLS_WSTRB, local_HLS_WDATA}),
            .if_num_data_valid ());

        assign wreq_ready       = offset_full_n | ~offset_write;
        assign tmp_addr_end     = tmp_addr[BUS_ADDR_ALIGN-1:0] + tmp_len[BUS_ADDR_ALIGN-1:0];
        assign start_offset     = tmp_addr[BUS_ADDR_ALIGN-1:0] >> USER_ADDR_ALIGN;
        assign end_offset       = ~tmp_addr_end[BUS_ADDR_ALIGN-1:0] >> USER_ADDR_ALIGN;
        assign align_len        = (tmp_len + tmp_addr[BUS_ADDR_ALIGN-1:0]) >> BUS_ADDR_ALIGN;
        assign offset_write     = tmp_valid && in_AXI_AWREADY;
        assign offset_read      = ~offset_valid || next_offset;
        assign offset_pack_in   = {start_offset, end_offset, align_len};
        assign {head_offset, tail_offset, total_len} = offset_pack_buf;

        assign in_wdata_pack    = {in_HLS_WSTRB, in_HLS_WDATA};
        assign in_wdata_vld     = in_HLS_WVALID;
        assign out_HLS_WREADY   = out_wdata_rdy;
        assign local_AXI_WSTRB  = strb_buf;
        assign local_AXI_WDATA  = data_buf;
        assign local_AXI_WVALID = data_valid;
        assign local_HLS_WREADY = offset_valid && ready_for_data;

        assign next_offset      = last_beat && next_beat;
        assign ready_for_data   = ~data_valid || local_AXI_WREADY;

        assign beat_len         = first_beat ? total_len : beat_len_cnt;
        assign first_beat       = first_beat_set && offset_valid;
        assign last_beat        = (single_beat || last_beat_set) && offset_valid;
        assign next_beat        = local_HLS_WREADY && last_pad;

        assign next_pad         = local_HLS_WREADY && local_HLS_WVALID;
        assign last_pad         = (last_beat) ? pad_oh[TOTAL_PADS-tail_offset-1] : pad_oh[TOTAL_PADS-1];
        assign head_pad_sel     = decoder(head_offset);
        assign tail_pad_sel     = decoder(tail_offset);

        always @(posedge ACLK)
        begin
            if (ARESET) begin
                single_beat <= 1'b0;
                offset_pack_buf <= 0;
            end
            else if (ACLK_EN) begin
                if (offset_empty_n && offset_read) begin
                    single_beat     <= (offset_pack_out[BEAT_LEN_WIDTH-1:0] == 0);
                    offset_pack_buf <= offset_pack_out;
                end
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                offset_valid <= 1'b0;
            else if (ACLK_EN) begin
                if (offset_empty_n && offset_read)
                    offset_valid <= 1'b1;
                else if (next_offset)
                    offset_valid <= 1'b0;
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                beat_len_cnt <= 0;
            else if (ACLK_EN) begin
                if (next_beat)
                    beat_len_cnt <= beat_len - 1;
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET) begin
                first_beat_set <= 1'b1;
                last_beat_set  <= 1'b0;
            end
            else if (ACLK_EN) begin
                if (next_offset) begin
                    first_beat_set <= 1'b1;
                    last_beat_set  <= 1'b0;
                end
                else if (next_beat) begin
                    first_beat_set <= 1'b0;
                    last_beat_set  <= (beat_len == 1);
                end
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                first_pad <= 1'b1;
            else if (ACLK_EN) begin
                if (next_pad && ~last_pad)
                    first_pad <= 1'b0;
                else if (next_pad && last_pad)
                    first_pad <= 1'b1;
            end
        end 
        
        assign pad_oh = (~local_HLS_WVALID)       ? 0                :
                        (first_pad && first_beat) ? 1 << head_offset :
                        (first_pad)?                1                :
                        pad_oh_reg;

        always @(posedge ACLK)
        begin
            if (ARESET)
                pad_oh_reg <= 0;
            else if (ACLK_EN) begin
                if (next_pad)
                    pad_oh_reg <= {pad_oh[TOTAL_PADS - 2:0], 1'b0};
            end
        end

        genvar  i;
        for (i = 0; i < TOTAL_PADS; i = i + 1) begin : data_gen
            assign add_head[i] = head_pad_sel[i] && first_beat;
            assign add_tail[i] = tail_pad_sel[i] && last_beat;

            always @(posedge ACLK)
            begin
                if (ARESET)
                    data_buf[i*USER_DATA_ALIGN +: USER_DATA_ALIGN] <= {USER_DATA_ALIGN{1'b0}};
                else if (ACLK_EN) begin
                    if ((add_head[i] || add_tail[i]) && ready_for_data)
                        data_buf[i*USER_DATA_ALIGN +: USER_DATA_ALIGN] <= {USER_DATA_ALIGN{1'b0}};
                    else if (pad_oh[i] == 1'b1 && ready_for_data)
                        data_buf[i*USER_DATA_ALIGN +: USER_DATA_ALIGN] <= local_HLS_WDATA;
                end
            end

            always @(posedge ACLK)
            begin
                if (ARESET)
                    strb_buf[i*USER_DATA_BYTES +: USER_DATA_BYTES] <= {USER_DATA_BYTES{1'b0}};
                else if (ACLK_EN) begin
                    if ((add_head[i] || add_tail[i]) && ready_for_data)
                        strb_buf[i*USER_DATA_BYTES +: USER_DATA_BYTES] <= {USER_DATA_BYTES{1'b0}};
                    else if (pad_oh[i] == 1'b1 && ready_for_data)
                        strb_buf[i*USER_DATA_BYTES +: USER_DATA_BYTES] <= local_HLS_WSTRB;
                end
            end

        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                data_valid <= 1'b0;
            else if (ACLK_EN) begin
                if (next_beat)
                    data_valid <= 1'b1;
                else if (ready_for_data)
                    data_valid <= 1'b0;
            end
        end

    end
    else begin : bus_narrow_gen
        localparam
            TOTAL_SPLIT       = USER_DATA_ALIGN / BUS_DATA_WIDTH,
            SPLIT_ALIGN       = log2(TOTAL_SPLIT),
            BEAT_LEN_WIDTH    = USER_LEN_WIDTH - BUS_ADDR_ALIGN,
            WBUFF_IN_DEPTH    = max(MAX_WRITE_BURST_LENGTH/TOTAL_SPLIT, 1);

        wire                       offset_full_n;
        wire                       offset_write;
        wire                       offset_valid;
        wire                       next_offset;

        wire [BEAT_LEN_WIDTH-1:0]  align_len;
        wire [BEAT_LEN_WIDTH-1:0]  beat_len;
        reg  [BEAT_LEN_WIDTH-1:0]  beat_len_cnt;

        wire                       ready_for_data;
        reg  [USER_DATA_WIDTH-1:0] data_buf;
        reg  [USER_DATA_BYTES-1:0] strb_buf;
        reg                        data_valid;

        wire [USER_DW+USER_DW/8-1:0] in_wdata_pack;
        wire                       in_wdata_vld;
        wire                       out_wdata_rdy;
        wire                       local_HLS_WVALID;
        wire                       local_HLS_WREADY;
        wire [USER_DW-1:0]         local_HLS_WDATA;
        wire [(USER_DW/8)-1:0]     local_HLS_WSTRB;

        reg  [SPLIT_ALIGN-1:0]     split_cnt;
        reg                        first_split_pred;
        wire                       first_split;
        wire                       next_split;
        wire                       last_split;

        // Recording the offset of start & end address to align beats from data USER_DW < BUS_DW.
        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH        (BEAT_LEN_WIDTH),
            .ADDR_WIDTH        (log2(NUM_WRITE_OUTSTANDING)),
            .DEPTH             (NUM_WRITE_OUTSTANDING)
        ) wreq_offset (
            .clk               (ACLK),
            .reset             (ARESET),
            .clk_en            (ACLK_EN),
            .if_full_n         (offset_full_n),
            .if_write          (offset_write),
            .if_din            (align_len),
            .if_empty_n        (offset_valid),
            .if_read           (next_offset),
            .if_dout           (beat_len),
            .if_num_data_valid ());

        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH        (USER_DW + USER_DW/8),
            .ADDR_WIDTH        (log2(WBUFF_IN_DEPTH)),
            .DEPTH             (WBUFF_IN_DEPTH)
        ) buff_wdata_in (
            .clk               (ACLK),
            .reset             (ARESET),
            .clk_en            (ACLK_EN),
            .if_full_n         (out_wdata_rdy),
            .if_write          (in_wdata_vld),
            .if_din            (in_wdata_pack),
            .if_empty_n        (local_HLS_WVALID),
            .if_read           (local_HLS_WREADY),
            .if_dout           ({local_HLS_WSTRB, local_HLS_WDATA}),
            .if_num_data_valid ());

        assign wreq_ready       = offset_full_n | ~offset_write;
        assign align_len        = (tmp_len + tmp_addr[BUS_ADDR_ALIGN-1:0]) >> BUS_ADDR_ALIGN;
        assign offset_write     = tmp_valid & in_AXI_AWREADY;

        assign in_wdata_pack    = {in_HLS_WSTRB, in_HLS_WDATA};
        assign in_wdata_vld     = in_HLS_WVALID;
        assign out_HLS_WREADY   = out_wdata_rdy;
        assign local_AXI_WDATA  = data_buf[BUS_DATA_WIDTH-1:0];
        assign local_AXI_WSTRB  = strb_buf[BUS_DATA_BYTES-1:0];
        assign local_AXI_WVALID = data_valid;
        assign local_HLS_WREADY = first_split_pred && offset_valid && ready_for_data;

        assign next_offset      = (beat_len_cnt == beat_len) && offset_valid && last_split;
        assign ready_for_data   = ~data_valid | local_AXI_WREADY;

        assign first_split      = local_HLS_WREADY && local_HLS_WVALID ;
        assign last_split       = (split_cnt == (TOTAL_SPLIT-1)) && ready_for_data;
        assign next_split       = ~first_split_pred && ready_for_data;
        
        always @(posedge ACLK)
        begin
            if (ARESET) begin
                first_split_pred <= 1'b1;
                split_cnt <= 0;
            end
            else if (ACLK_EN) begin
                if (last_split) begin
                    first_split_pred <= 1'b1;
                    split_cnt <= 0;
                end
                else if (first_split || next_split) begin
                    first_split_pred <= 1'b0;
                    split_cnt <= split_cnt + 1;
                end
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                beat_len_cnt <= 0;
            else if (ACLK_EN) begin
                if (next_offset)
                    beat_len_cnt <= 0;
                else if (first_split || next_split)
                    beat_len_cnt <= beat_len_cnt + 1;
            end
        end
 
        always @(posedge ACLK)
        begin
            if (ARESET)
                data_buf <= {USER_DATA_WIDTH{1'b0}};
            else if (ACLK_EN) begin
                if (first_split)
                    data_buf <= local_HLS_WDATA;
                else if (next_split)
                    data_buf <= data_buf >> BUS_DATA_WIDTH;
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                strb_buf <= {USER_DATA_BYTES{1'b0}};
            else if (ACLK_EN) begin
                if (first_split)
                    strb_buf <= local_HLS_WSTRB;
                else if (next_split)
                    strb_buf <= strb_buf >> BUS_DATA_BYTES;
            end
        end

        always @(posedge ACLK)
        begin
            if (ARESET)
                data_valid <= 1'b0;
            else if (ACLK_EN) begin
                if (first_split)
                    data_valid <= 1'b1;
                else if (~next_split && ready_for_data)
                    data_valid <= 1'b0;
            end
        end
    end
    endgenerate
    // end of WDATA PREPROCESSOR
    // ===================================================================

    // generate response for all request (including request with invalid length)
    s2mm_gmem_m_axi_fifo #(
        .DATA_WIDTH        (1),
        .ADDR_WIDTH        (log2(NUM_WRITE_OUTSTANDING)),
        .DEPTH             (NUM_WRITE_OUTSTANDING)
    ) fifo_wrsp (
        .clk               (ACLK),
        .reset             (ARESET),
        .clk_en            (ACLK_EN),
        .if_full_n         (wrsp_ready),
        .if_write          (next_wreq),
        .if_din            (valid_length),
        .if_empty_n        (wrsp_valid),
        .if_read           (wrsp_read),
        .if_dout           (wrsp_type), // 1 - valid length request, 0 - invalid length request
        .if_num_data_valid ());

    s2mm_gmem_m_axi_fifo #(
        .DATA_WIDTH        (1),
        .ADDR_WIDTH        (log2(USER_MAXREQS)),
        .DEPTH             (USER_MAXREQS)
    ) user_resp (
        .clk               (ACLK),
        .reset             (ARESET),
        .clk_en            (ACLK_EN),
        .if_full_n         (ursp_ready),
        .if_write          (ursp_write),
        .if_din            (1'b1),
        .if_empty_n        (out_HLS_BVALID),
        .if_read           (in_HLS_BREADY),
        .if_dout           (),
        .if_num_data_valid ());


    assign ursp_write  = wrsp_valid && (!wrsp_type || in_AXI_BVALID);
    assign wrsp_read   = ursp_ready && ursp_write;
    assign out_AXI_BREADY = wrsp_type && ursp_ready;

endmodule


// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689

`timescale 1ns/1ps





module s2mm_gmem_m_axi_write
#(parameter
    CONSERVATIVE              = 0,
    C_M_AXI_ID_WIDTH          = 1,
    C_M_AXI_AWUSER_WIDTH      = 1,
    C_M_AXI_WUSER_WIDTH       = 1,
    C_M_AXI_BUSER_WIDTH       = 1,
    C_USER_VALUE              = 1'b0,
    C_PROT_VALUE              = 3'b000,
    C_CACHE_VALUE             = 4'b0011,
    BUS_ADDR_WIDTH            = 32,
    BUS_DATA_WIDTH            = 32,
    USER_LEN_WIDTH            = 32,
    MAX_WRITE_BURST_LENGTH    = 16,
    NUM_WRITE_OUTSTANDING     = 2,
    ID0_NUM_WRITE_OUTSTANDING = 2,
    NUM_WRITE_PORTS           = 1
)(
    // system signal
    input  wire                             ACLK,
    input  wire                             ARESET,
    input  wire                             ACLK_EN,
    // write address channel
    output wire [C_M_AXI_ID_WIDTH-1:0]      out_BUS_AWID,
    output wire [2:0]                       out_BUS_AWSIZE,
    output wire [1:0]                       out_BUS_AWBURST,
    output wire [1:0]                       out_BUS_AWLOCK,
    output wire [3:0]                       out_BUS_AWCACHE,
    output wire [2:0]                       out_BUS_AWPROT,
    output wire [3:0]                       out_BUS_AWQOS,
    output wire [3:0]                       out_BUS_AWREGION,
    output wire [C_M_AXI_AWUSER_WIDTH-1:0]  out_BUS_AWUSER,
    output wire [BUS_ADDR_WIDTH-1:0]        out_BUS_AWADDR,
    output wire [7:0]                       out_BUS_AWLEN,
    output wire                             out_BUS_AWVALID,
    input  wire                             in_BUS_AWREADY,
    // write data channel
    output wire [C_M_AXI_ID_WIDTH-1:0]      out_BUS_WID,
    output wire [C_M_AXI_WUSER_WIDTH-1:0]   out_BUS_WUSER,
    output wire [BUS_DATA_WIDTH-1:0]        out_BUS_WDATA,
    output wire [BUS_DATA_WIDTH/8-1:0]      out_BUS_WSTRB,
    output wire                             out_BUS_WLAST,
    output wire                             out_BUS_WVALID,
    input  wire                             in_BUS_WREADY,
    // write response channel
    input  wire [C_M_AXI_ID_WIDTH-1:0]      in_BUS_BID,
    input  wire [1:0]                       in_BUS_BRESP,
    input  wire [C_M_AXI_BUSER_WIDTH-1:0]   in_BUS_BUSER,
    input  wire                             in_BUS_BVALID,
    output wire                             out_BUS_BREADY,
    // internal channels
    input  wire [C_M_AXI_ID_WIDTH-1:0]      in_AXI_AWID,
    input  wire [BUS_ADDR_WIDTH-1:0]        in_AXI_AWADDR,
    input  wire [USER_LEN_WIDTH-1:0]        in_AXI_AWLEN,
    input  wire                             in_AXI_AWVALID,
    output wire [NUM_WRITE_PORTS-1:0]       out_AXI_AWREADY,
    output wire [C_M_AXI_ID_WIDTH-1:0]      out_AXI_WID,
    input  wire [BUS_DATA_WIDTH-1:0]        in_AXI_WDATA,
    input  wire [BUS_DATA_WIDTH/8-1:0]      in_AXI_WSTRB,
    input  wire                             in_AXI_WVALID,
    output wire                             out_AXI_WREADY,
    output wire [NUM_WRITE_PORTS-1:0]       out_AXI_BVALID,
    input  wire [NUM_WRITE_PORTS-1:0]       in_AXI_BREADY,
    // write throttling controls (only for conservative mode)
    output wire [7:0]                       out_BURST_AWLEN,
    output wire [NUM_WRITE_PORTS-1:0]       out_BURST_AWVALID,
    input  wire [C_M_AXI_ID_WIDTH-1:0]      in_BURST_WID,
    input  wire [7:0]                       in_BURST_WLEN,
    input  wire                             in_BURST_WVALID,
    output wire [NUM_WRITE_PORTS-1:0]       out_BURST_WREADY
);

//------------------------Parameter----------------------
    localparam
        BUS_DATA_BYTES  = BUS_DATA_WIDTH / 8,
        BUS_ADDR_ALIGN  = log2(BUS_DATA_BYTES);
//------------------------Task and function--------------
    function integer log2;
        input integer x;
        integer n, m;
    begin
        n = 0;
        m = 1;
        while (m < x) begin
            n = n + 1;
            m = m * 2;
        end
        log2 = n;
    end
    endfunction

    function [NUM_WRITE_PORTS-1:0] bit_set;
        input [C_M_AXI_ID_WIDTH-1:0] idx;
        input                        valid;
    begin
        bit_set = {NUM_WRITE_PORTS{1'b0}};
        bit_set[idx] = valid;
    end
    endfunction

    function integer num_outstanding_val;
        input integer idx;
    begin
        case (idx)
            0 : num_outstanding_val = ID0_NUM_WRITE_OUTSTANDING;
            default : num_outstanding_val = 0;
        endcase
    end
    endfunction

    // Convert the actual AXI ID to the ID locally used by the write module
    function [C_M_AXI_ID_WIDTH-1:0] compress_axi_id;
        input [C_M_AXI_ID_WIDTH-1:0] axi_id;
        case (axi_id)
            'd0 : compress_axi_id = 'd0;
            default : compress_axi_id = 'd0;
        endcase
    endfunction

    // Convert the ID locally used by the write module to the actual AXI ID
    function [C_M_AXI_ID_WIDTH-1:0] decompress_axi_id;
        input [C_M_AXI_ID_WIDTH-1:0] local_id;
        case (local_id)
            'd0 : decompress_axi_id = 'd0;
            default : decompress_axi_id = 'd0;
        endcase
    endfunction
//------------------------Local signal-------------------
    genvar idx;

    wire [C_M_AXI_ID_WIDTH-1:0] local_BUS_AWID;
    wire [C_M_AXI_ID_WIDTH-1:0] local_BUS_WID;
    wire [C_M_AXI_ID_WIDTH-1:0] local_BUS_BID;

    wire [C_M_AXI_ID_WIDTH-1:0] local_BURST_AWID;
    wire [BUS_ADDR_WIDTH - 1:0] local_BURST_AWADDR;
    wire [7:0]                  local_BURST_AWLEN;
    wire                        local_BURST_AWVALID;
    wire                        local_BURST_AWREADY;
    wire                        local_BURST_WREADY;

    wire [C_M_AXI_ID_WIDTH-1:0] ost_ctrl_id;
    wire                        ost_ctrl_info;
    wire                        ost_ctrl_valid;
    wire [NUM_WRITE_PORTS-1:0]  ost_ctrl_ready;
    wire [NUM_WRITE_PORTS-1:0]  ost_ctrl_write;

    wire [C_M_AXI_ID_WIDTH-1:0] resp_id;
    wire                        resp_valid;
    wire                        resp_ready;
    wire                        next_resp;

    wire [NUM_WRITE_PORTS-1:0]  ost_resp_valid;
    wire [NUM_WRITE_PORTS-1:0]  ost_resp_info;
    wire [NUM_WRITE_PORTS-1:0]  ost_resp_read;

    // regslice io ?  no 

// -----------------------BUS global config -------------
    assign out_BUS_AWID        = decompress_axi_id(local_BUS_AWID);
    assign out_BUS_AWSIZE      = BUS_ADDR_ALIGN;
    assign out_BUS_AWBURST     = 2'b01;
    assign out_BUS_AWLOCK      = 2'b00;
    assign out_BUS_AWCACHE     = C_CACHE_VALUE;
    assign out_BUS_AWPROT      = C_PROT_VALUE;
    assign out_BUS_AWUSER      = C_USER_VALUE;
    assign out_BUS_AWQOS       = 4'b0000;
    assign out_BUS_AWREGION    = 4'b0000;
    assign out_BUS_WID         = decompress_axi_id(local_BUS_WID);
    assign out_BUS_WUSER       = C_USER_VALUE;
//------------------------AW channel begin---------------
//------------------------Instantiation------------------
    s2mm_gmem_m_axi_burst_converter #(
        .ID_WIDTH              (C_M_AXI_ID_WIDTH),
        .DATA_WIDTH            (BUS_DATA_WIDTH),
        .ADDR_WIDTH            (BUS_ADDR_WIDTH),
        .LEN_WIDTH             (USER_LEN_WIDTH),
        .NUM_PORTS             (NUM_WRITE_PORTS),
        .MAX_BURST_LEN         (MAX_WRITE_BURST_LENGTH)
    ) wreq_burst_conv (
        .clk                   (ACLK),
        .reset                 (ARESET),
        .clk_en                (ACLK_EN),
        .in_REQ_ID             (in_AXI_AWID),
        .in_REQ_ADDR           (in_AXI_AWADDR),
        .in_REQ_LEN            (in_AXI_AWLEN),
        .in_REQ_VALID          (in_AXI_AWVALID),
        .out_REQ_READY         (out_AXI_AWREADY),
        .out_BURST_ID          (local_BURST_AWID),
        .out_BURST_ADDR        (local_BURST_AWADDR),
        .out_BURST_LEN         (local_BURST_AWLEN),
        .out_BURST_VALID       (local_BURST_AWVALID),
        .in_BURST_READY        (local_BURST_AWREADY),
        .out_CTRL_ID           (ost_ctrl_id),
        .out_CTRL_INFO         (ost_ctrl_info),
        .out_CTRL_VALID        (ost_ctrl_valid),
        .in_CTRL_READY         (ost_ctrl_ready)
    );
    //------------------------Body---------------------------
    assign out_BURST_AWLEN     = local_BURST_AWLEN;
    assign out_BURST_AWVALID   = bit_set(local_BURST_AWID, local_BURST_AWVALID);
    assign out_BURST_WREADY    = {NUM_WRITE_PORTS{local_BURST_WREADY}};
//------------------------AW channel end-----------------

//------------------------W channel begin----------------
//------------------------Instantiation------------------
    s2mm_gmem_m_axi_throttle #(
        .CONSERVATIVE          (CONSERVATIVE),
        .ID_WIDTH              (C_M_AXI_ID_WIDTH),
        .ADDR_WIDTH            (BUS_ADDR_WIDTH),
        .DATA_WIDTH            (BUS_DATA_WIDTH),
        .NUM_OUTSTANDING       (NUM_WRITE_OUTSTANDING),
        .ID0_NUM_OUTSTANDING  (ID0_NUM_WRITE_OUTSTANDING),
        .NUM_PORTS             (NUM_WRITE_PORTS)
    ) wreq_throttle (
        .clk                   (ACLK),
        .reset                 (ARESET),
        .clk_en                (ACLK_EN),
        .in_BURST_AWID         (local_BURST_AWID),
        .in_BURST_AWADDR       (local_BURST_AWADDR),
        .in_BURST_AWLEN        (local_BURST_AWLEN),
        .in_BURST_AWVALID      (local_BURST_AWVALID),
        .out_BURST_AWREADY     (local_BURST_AWREADY),
        .in_BURST_WID          (in_BURST_WID),      // only for conservative mode
        .in_BURST_WLEN         (in_BURST_WLEN),     // only for conservative mode
        .in_BURST_WVALID       (in_BURST_WVALID),   // only for conservative mode
        .out_BURST_WREADY      (local_BURST_WREADY),// only for conservative mode
        .out_AXI_WID           (out_AXI_WID),
        .in_AXI_WDATA          (in_AXI_WDATA),
        .in_AXI_WSTRB          (in_AXI_WSTRB),
        .in_AXI_WVALID         (in_AXI_WVALID),
        .out_AXI_WREADY        (out_AXI_WREADY),
        // AXI BUS 
        .out_BUS_AWID          (local_BUS_AWID),
        .out_BUS_AWADDR        (out_BUS_AWADDR),
        .out_BUS_AWLEN         (out_BUS_AWLEN),
        .out_BUS_AWVALID       (out_BUS_AWVALID),
        .in_BUS_AWREADY        (in_BUS_AWREADY),
        .out_BUS_WID           (local_BUS_WID),
        .out_BUS_WDATA         (out_BUS_WDATA),
        .out_BUS_WSTRB         (out_BUS_WSTRB),
        .out_BUS_WLAST         (out_BUS_WLAST),
        .out_BUS_WVALID        (out_BUS_WVALID),
        .in_BUS_WREADY         (in_BUS_WREADY)
    );

//------------------------Body---------------------------
//------------------------W channel end------------------

//------------------------B channel begin----------------
//------------------------Instantiation------------------
    s2mm_gmem_m_axi_reg_slice #(
        .DATA_WIDTH            (C_M_AXI_ID_WIDTH)
    ) rs_resp (
        .clk                   (ACLK),
        .reset                 (ARESET),
        .s_data                (local_BUS_BID),
        .s_valid               (in_BUS_BVALID),
        .s_ready               (out_BUS_BREADY),
        .m_data                (resp_id),
        .m_valid               (resp_valid),
        .m_ready               (resp_ready));

    generate
    for (idx = 0; idx < NUM_WRITE_PORTS; idx = idx + 1) begin : fifo_resp_gen
        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH        (1),
            .ADDR_WIDTH        (log2(num_outstanding_val(idx))),
            .DEPTH             (num_outstanding_val(idx))
        ) fifo_resp (
            .clk               (ACLK),
            .reset             (ARESET),
            .clk_en            (ACLK_EN),
            .if_full_n         (ost_ctrl_ready[idx]),
            .if_write          (ost_ctrl_write[idx]),
            .if_din            (ost_ctrl_info),
            .if_empty_n        (ost_resp_valid[idx]),
            .if_read           (ost_resp_read[idx]),
            .if_dout           (ost_resp_info[idx]),
            .if_num_data_valid ());
    end
    endgenerate
//------------------------Body---------------------------
    assign ost_ctrl_write = bit_set(ost_ctrl_id, ost_ctrl_valid);
    assign ost_resp_read  = bit_set(resp_id, next_resp);

    assign resp_ready = (ost_resp_valid[resp_id] == 1'b1) && ((in_AXI_BREADY[resp_id] == 1'b1) || (ost_resp_info[resp_id] == 1'b0));
    assign next_resp  = resp_ready && resp_valid;

    assign out_AXI_BVALID = ost_resp_info & bit_set(resp_id, resp_valid);
    assign local_BUS_BID  = compress_axi_id(in_BUS_BID);
//------------------------B channel end------------------
endmodule


module s2mm_gmem_m_axi_burst_converter
#(parameter
    INTERLEAVE                   = 1,
    ID_WIDTH                     = 1,
    DATA_WIDTH                   = 32,
    ADDR_WIDTH                   = 32,
    LEN_WIDTH                    = 32,
    NUM_PORTS                    = 1,
    MAX_BURST_LEN                = 16
)(
    input  wire                  clk,
    input  wire                  reset,
    input  wire                  clk_en,

    input  wire [ID_WIDTH-1:0]   in_REQ_ID,
    input  wire [ADDR_WIDTH-1:0] in_REQ_ADDR,
    input  wire [LEN_WIDTH-1:0]  in_REQ_LEN,
    input  wire                  in_REQ_VALID,
    output wire [NUM_PORTS-1:0]  out_REQ_READY,
    output wire [ID_WIDTH-1:0]   out_BURST_ID,
    output wire [ADDR_WIDTH-1:0] out_BURST_ADDR,
    output wire [7:0]            out_BURST_LEN,
    output wire                  out_BURST_VALID,
    input  wire                  in_BURST_READY,
    output wire [ID_WIDTH-1:0]   out_CTRL_ID,
    output wire                  out_CTRL_INFO,
    output wire                  out_CTRL_VALID,
    input  wire [NUM_PORTS-1:0]  in_CTRL_READY
);

    generate
    if ((INTERLEAVE == 1) && (NUM_PORTS > 1)) begin
        s2mm_gmem_m_axi_burst_interleave #(
            .ID_WIDTH          (ID_WIDTH),
            .DATA_WIDTH        (DATA_WIDTH),
            .ADDR_WIDTH        (ADDR_WIDTH),
            .LEN_WIDTH         (LEN_WIDTH),
            .NUM_PORTS         (NUM_PORTS),
            .MAX_BURST_LEN     (MAX_BURST_LEN)
        ) burst_interleave (
            .clk               (clk),
            .reset             (reset),
            .clk_en            (clk_en),
            .in_REQ_ID         (in_REQ_ID),
            .in_REQ_ADDR       (in_REQ_ADDR),
            .in_REQ_LEN        (in_REQ_LEN),
            .in_REQ_VALID      (in_REQ_VALID),
            .out_REQ_READY     (out_REQ_READY),
            .out_BURST_ID      (out_BURST_ID),
            .out_BURST_ADDR    (out_BURST_ADDR),
            .out_BURST_LEN     (out_BURST_LEN),
            .out_BURST_VALID   (out_BURST_VALID),
            .in_BURST_READY    (in_BURST_READY),
            .out_CTRL_ID       (out_CTRL_ID),
            .out_CTRL_INFO     (out_CTRL_INFO),
            .out_CTRL_VALID    (out_CTRL_VALID),
            .in_CTRL_READY     (in_CTRL_READY)
        );

    end
    else begin
        s2mm_gmem_m_axi_burst_sequential #(
            .ID_WIDTH          (ID_WIDTH),
            .DATA_WIDTH        (DATA_WIDTH),
            .ADDR_WIDTH        (ADDR_WIDTH),
            .LEN_WIDTH         (LEN_WIDTH),
            .NUM_PORTS         (NUM_PORTS),
            .MAX_BURST_LEN     (MAX_BURST_LEN)
        ) burst_sequential (
            .clk               (clk),
            .reset             (reset),
            .clk_en            (clk_en),
            .in_REQ_ID         (in_REQ_ID),
            .in_REQ_ADDR       (in_REQ_ADDR),
            .in_REQ_LEN        (in_REQ_LEN),
            .in_REQ_VALID      (in_REQ_VALID),
            .out_REQ_READY     (out_REQ_READY),
            .out_BURST_ID      (out_BURST_ID),
            .out_BURST_ADDR    (out_BURST_ADDR),
            .out_BURST_LEN     (out_BURST_LEN),
            .out_BURST_VALID   (out_BURST_VALID),
            .in_BURST_READY    (in_BURST_READY),
            .out_CTRL_ID       (out_CTRL_ID),
            .out_CTRL_INFO     (out_CTRL_INFO),
            .out_CTRL_VALID    (out_CTRL_VALID),
            .in_CTRL_READY     (in_CTRL_READY)
        );

    end
    endgenerate

endmodule


module s2mm_gmem_m_axi_burst_interleave
#(parameter
    ID_WIDTH                     = 1,
    DATA_WIDTH                   = 32,
    ADDR_WIDTH                   = 32,
    LEN_WIDTH                    = 32,
    NUM_PORTS                    = 1,
    MAX_BURST_LEN                = 16
)(
    input  wire                  clk,
    input  wire                  reset,
    input  wire                  clk_en,
    input  wire [ID_WIDTH-1:0]   in_REQ_ID,
    input  wire [ADDR_WIDTH-1:0] in_REQ_ADDR,
    input  wire [LEN_WIDTH-1:0]  in_REQ_LEN,
    input  wire                  in_REQ_VALID,
    output wire [NUM_PORTS-1:0]  out_REQ_READY,
    output wire [ID_WIDTH-1:0]   out_BURST_ID,
    output wire [ADDR_WIDTH-1:0] out_BURST_ADDR,
    output wire [7:0]            out_BURST_LEN,
    output wire                  out_BURST_VALID,
    input  wire                  in_BURST_READY,
    output wire [ID_WIDTH-1:0]   out_CTRL_ID,
    output wire                  out_CTRL_INFO,
    output wire                  out_CTRL_VALID,
    input  wire [NUM_PORTS-1:0]  in_CTRL_READY
);
//------------------------Parameter----------------------
    localparam
        PACK_WIDTH      = ID_WIDTH+ADDR_WIDTH+LEN_WIDTH,
        DATA_BYTES      = DATA_WIDTH / 8,
        ADDR_ALIGN      = log2(DATA_BYTES),
        BOUNDARY_BEATS  = {12-ADDR_ALIGN{1'b1}},
        NUM_BEAT_WIDTH  = log2(MAX_BURST_LEN);
//------------------------Task and function--------------
    function integer log2;
        input integer x;
        integer n, m;
        begin
            n = 0;
            m = 1;
            while (m < x) begin
                n = n + 1;
                m = m * 2;
            end
            log2 = n;
        end
    endfunction
//------------------------Local signal-------------------
    wire [PACK_WIDTH-1:0]       req_pack_in;
    wire [PACK_WIDTH-1:0]       req_pack_out;
    wire [ID_WIDTH-1:0]         req_id_tmp;
    wire [ADDR_WIDTH-1:0]       req_addr_tmp;
    wire [LEN_WIDTH-1:0]        req_len_tmp;

    wire                        req_full_n;
    wire                        req_empty_n;
    wire                        write_req;
    wire                        read_req;
    reg  [NUM_PORTS-1:0]        req_ready;
    wire                        next_req;

    reg  [ADDR_WIDTH - 1:0]     start_addr;
    wire [ADDR_WIDTH - 1:0]     sect_addr;
    reg  [ADDR_WIDTH - 1:0]     sect_addr_buf;
    reg  [ID_WIDTH-1:0]         req_id;
    reg  [ID_WIDTH-1:0]         req_id_buf;
    reg                         req_handling;

    reg  [11 - ADDR_ALIGN:0]    start_to_4k;
    reg  [11 - ADDR_ALIGN:0]    end_from_4k;
    wire [11 - ADDR_ALIGN:0]    sect_len;
    reg  [11 - ADDR_ALIGN:0]    sect_len_buf;
    reg  [LEN_WIDTH-1:0]        beat_len;
    reg  [LEN_WIDTH-1:0]        beat_len_buf;

    reg  [ADDR_WIDTH-13:0]      sect_cnt;
    reg  [LEN_WIDTH-13:0]       sect_total;
    reg  [LEN_WIDTH-13:0]       sect_total_buf;
    wire [LEN_WIDTH-13:0]       sect_total_tmp;
    wire                        ready_for_sect;

    wire                        single_sect;
    reg                         first_sect;
    reg                         last_sect;
    wire                        last_sect_tmp;
    reg                         last_sect_buf;
    wire                        next_sect;

    reg                         burst_valid;

    wire [ID_WIDTH-1:0]         ost_ctrl_id;
    wire                        ost_ctrl_info;
    wire                        ost_ctrl_valid;
    wire                        ost_ctrl_ready;

    wire [PACK_WIDTH-1:0]       rem_req_pack;
    reg                         rem_req_valid;
    reg  [ID_WIDTH-1:0]         rem_req_id;
    reg  [ADDR_WIDTH-1:0]       rem_req_addr;
    reg  [LEN_WIDTH-1:0]        rem_req_len;
    wire [ADDR_WIDTH-1:0]       rem_req_addr_pred;
    wire [LEN_WIDTH-1:0]        rem_req_len_pred;

//------------------------Instantiation------------------
    generate 
    if (NUM_PORTS > 2) begin
        s2mm_gmem_m_axi_fifo #(
            .DATA_WIDTH        (PACK_WIDTH),
            .ADDR_WIDTH        (log2(NUM_PORTS)),
            .DEPTH             (NUM_PORTS)
        ) req_buffer (
            .clk               (clk),
            .reset             (reset),
            .clk_en            (clk_en),
            .if_full_n         (req_full_n),
            .if_write          (write_req),
            .if_din            (req_pack_in),
            .if_empty_n        (req_empty_n),
            .if_read           (read_req),
            .if_dout           (req_pack_out),
            .if_num_data_valid ());
    end
    else begin
        s2mm_gmem_m_axi_reg_slice #(
            .DATA_WIDTH     (PACK_WIDTH)
        ) rs_req (
            .clk            (clk),
            .reset          (reset),
            .s_ready        (req_full_n),
            .s_valid        (write_req),
            .s_data         (req_pack_in),
            .m_valid        (req_empty_n),
            .m_ready        (read_req),
            .m_data         (req_pack_out));
    end
    endgenerate
        
//------------------------Body--------------------------- 
    assign out_REQ_READY = (req_full_n && ~rem_req_valid) ? req_ready : {NUM_PORTS{1'b0}};
    assign req_pack_in   = rem_req_valid ? rem_req_pack  : {in_REQ_ID, in_REQ_LEN, in_REQ_ADDR};
    assign write_req     = rem_req_valid || in_REQ_VALID;
    
    always @(posedge clk) 
    begin
        if (reset)
            req_ready <= {NUM_PORTS{1'b1}};
        else if (clk_en) begin
            if (in_REQ_VALID && req_full_n && ~rem_req_valid)
                req_ready[in_REQ_ID] = 1'b0;
            if (ost_ctrl_info && ost_ctrl_valid)
                req_ready[ost_ctrl_id] = 1'b1;
        end
    end

    assign req_id_tmp    = req_pack_out[PACK_WIDTH-1 : ADDR_WIDTH+LEN_WIDTH];
    assign req_len_tmp   = req_pack_out[ADDR_WIDTH+LEN_WIDTH-1 : ADDR_WIDTH];
    assign req_addr_tmp  = req_pack_out[ADDR_WIDTH-1 : 0];

    assign next_req      = read_req && req_empty_n;

    always @(posedge clk)
    begin
        if (reset) begin
            req_id      <= 0;
            start_addr  <= 0;
            sect_total  <= 0;
            end_from_4k <= 0;
            start_to_4k <= 0;
        end
        else if (clk_en) begin
            if(next_req) begin
                req_id      <= req_id_tmp;
                start_addr  <= {req_addr_tmp[ADDR_WIDTH-1:ADDR_ALIGN], {ADDR_ALIGN{1'b0}}}; // addr align
                sect_total  <= (req_len_tmp + req_addr_tmp[11:0]) >> 12;
                end_from_4k <= (req_addr_tmp[11:0] + req_len_tmp[11:0]) >> ADDR_ALIGN;
                start_to_4k <= BOUNDARY_BEATS - req_addr_tmp[11:ADDR_ALIGN];
            end
        end
    end

    always @(posedge clk)
    begin
        if (reset)
            req_handling <= 1'b0;
        else if (clk_en) begin
            if (next_req)
                req_handling <= 1'b1;
            else if (~req_empty_n && last_sect_tmp && next_sect)
                req_handling <= 1'b0;
        end
    end

    // 4k boundary
    assign last_sect_tmp  = single_sect || last_sect;

    assign sect_total_tmp = first_sect ? sect_total : sect_total_buf;

    assign single_sect  = (sect_total == 0);

    assign sect_addr  = (first_sect)? start_addr : {sect_cnt, {12{1'b0}}};
    assign sect_len   = single_sect                ? beat_len[11-ADDR_ALIGN:0] :
                        ( first_sect && ~last_sect)? start_to_4k :
                        (~first_sect &&  last_sect)? end_from_4k :
                                                     BOUNDARY_BEATS;
   always @(posedge clk)
    begin
        if (reset) begin
            first_sect <= 1'b0;
            last_sect <= 1'b0;
            sect_cnt <= 0;
            beat_len <= 0;
        end
        else if (clk_en) begin
            if (next_req) begin
                first_sect <= 1'b1;
                last_sect <= 1'b0;
                sect_cnt <= req_addr_tmp[ADDR_WIDTH-1:12];
                beat_len <= (req_len_tmp + req_addr_tmp[ADDR_ALIGN-1:0]) >> ADDR_ALIGN;  // beat align
            end
            else if (next_sect) begin
                first_sect <= 1'b0;
                last_sect <= (sect_total_tmp == 1);
                sect_cnt <= sect_cnt + 1;
                beat_len <= beat_len - sect_len - 1;
            end
        end
    end

    always @(posedge clk)
    begin
        if (reset) begin
            req_id_buf     <= 0;
            sect_addr_buf  <= 0;
            sect_len_buf   <= 0;
            last_sect_buf  <= 1'b0;
            beat_len_buf   <= 0;
            sect_total_buf <= 0;
        end
        else if (clk_en) begin
            if (next_sect) begin
                req_id_buf     <= req_id;
                sect_addr_buf  <= sect_addr;
                sect_len_buf   <= sect_len;
                last_sect_buf  <= last_sect_tmp;
                beat_len_buf   <= beat_len;
                sect_total_buf <= sect_total_tmp - 1;
            end
        end
    end

    assign out_CTRL_VALID     = ost_ctrl_valid;
    assign out_CTRL_INFO      = ost_ctrl_info;
    assign out_CTRL_ID        = ost_ctrl_id;

    generate
    if (DATA_BYTES >= 4096/MAX_BURST_LEN) begin : must_one_burst
        wire                      read_sect;

        assign out_BURST_ADDR     = sect_addr_buf;
        assign out_BURST_LEN      = sect_len_buf;
        assign out_BURST_VALID    = burst_valid;
        assign out_BURST_ID       = req_id_buf;

        assign ost_ctrl_valid     = next_sect;
        assign ost_ctrl_info      = last_sect_tmp;
        assign ost_ctrl_id        = req_id;
        assign ost_ctrl_ready     = in_CTRL_READY[req_id];

        assign next_sect          = read_sect && ost_ctrl_ready;
        assign ready_for_sect     = ~(burst_valid && ~in_BURST_READY) && req_full_n && |in_CTRL_READY;
        assign read_sect          = req_handling & ready_for_sect;
        assign read_req           = ~req_handling || ready_for_sect;

        always @(posedge clk)
        begin
            if (reset)
                burst_valid <= 1'b0;
            else if (clk_en) begin
                if (next_sect)
                    burst_valid <= 1'b1;
                else if (in_BURST_READY)
                    burst_valid <= 1'b0;
            end
        end

        // calculate remaining request, for interleaved burst handling.
        assign rem_req_pack      = {rem_req_id, {rem_req_len[LEN_WIDTH-ADDR_ALIGN-1:0], {ADDR_ALIGN{1'b1}}}, rem_req_addr};
        assign rem_req_addr_pred = ost_ctrl_ready ? {sect_cnt+1, {12{1'b0}}}  : sect_addr;
        assign rem_req_len_pred  = ost_ctrl_ready ? (beat_len - sect_len - 1) : beat_len;

        always @(posedge clk)
        begin
            if (reset) begin
                rem_req_id   <= 0;
                rem_req_addr <= 0;
                rem_req_len <= 0;
            end
            else if (clk_en) begin
                if (read_sect) begin
                    rem_req_id   <= req_id;
                    rem_req_addr <= rem_req_addr_pred;
                    rem_req_len  <= rem_req_len_pred;
                end
            end
        end

        always @(posedge clk)
        begin
            if (reset)
                rem_req_valid <= 1'b0;
            else if (clk_en) begin
                if (next_sect && last_sect_tmp)
                    rem_req_valid <= 1'b0;
                else if (req_empty_n && read_sect)
                    rem_req_valid <= 1'b1;
                else if (req_full_n)
                    rem_req_valid <= 1'b0;
            end
        end
    end
    else begin : could_multi_bursts
        reg  [ID_WIDTH-1:0]                       burst_id;
        reg  [ADDR_WIDTH - 1:0]                   burst_addr;
        wire [ADDR_WIDTH - 1:0]                   burst_addr_next;
        reg  [ADDR_WIDTH - 1:0]                   burst_addr_pred;
        reg  [7:0]                                burst_len;
        reg  [7:0]                                burst_len_pred;
        reg  [8:0]                                burst_len_pred_plus1;

        reg                                       next_req_ready;
        reg                                       sect_handling;
        reg  [11 - NUM_BEAT_WIDTH - ADDR_ALIGN:0] loop_cnt;
        reg                                       first_loop;
        reg                                       last_loop;
        wire                                      next_loop;
        wire                                      read_loop;
        wire                                      ready_for_loop;

        wire                                      last_loop_when_next_loop;
        wire                                      last_loop_when_next_sect;
        wire [7:0]                                burst_len_when_next_loop;
        wire [7:0]                                burst_len_when_next_sect;

        assign out_BURST_ADDR  = burst_addr;
        assign out_BURST_LEN   = burst_len;
        assign out_BURST_VALID = burst_valid;
        assign out_BURST_ID    = burst_id;

        assign ost_ctrl_valid  = next_loop;
        assign ost_ctrl_info   = last_loop && last_sect_buf;
        assign ost_ctrl_id     = req_id_buf;
        assign ost_ctrl_ready  = in_CTRL_READY[req_id_buf];

        assign ready_for_loop  = ~(burst_valid && ~in_BURST_READY) && req_full_n && |in_CTRL_READY; 
        assign read_loop       = sect_handling && ready_for_loop;
        assign next_loop       = read_loop & ost_ctrl_ready;

        assign next_sect       = req_handling & ready_for_sect;
        assign ready_for_sect  = ~sect_handling  || (read_loop && next_req_ready) || (next_loop && last_loop);
        assign read_req        = ~next_req_ready || ready_for_sect;

        always @(posedge clk)
        begin
            if (reset)
                burst_valid <= 1'b0;
            else if (clk_en) begin
                if (next_loop)
                    burst_valid <= 1'b1;
                else if (in_BURST_READY)
                    burst_valid <= 1'b0;
            end
        end

        always @(posedge clk)
        begin
            if (reset)
                sect_handling <= 1'b0;
            else if (clk_en) begin
                if (req_handling && ~sect_handling)
                    sect_handling <= 1'b1;
                else if (~req_handling && last_loop && next_loop)
                    sect_handling <= 1'b0;
            end
        end

        always @(posedge clk)
        begin
            if (reset) begin
                first_loop <= 1'b0;
                last_loop  <= 1'b0;
                loop_cnt   <= 0;
            end
            else if (clk_en) begin
                if (next_sect) begin
                    first_loop <= 1'b1;
                    last_loop  <= last_loop_when_next_sect;
                    loop_cnt   <= sect_len[11 - ADDR_ALIGN : NUM_BEAT_WIDTH];
                end
                else if (next_loop) begin
                    first_loop <= 1'b0;
                    last_loop  <= last_loop_when_next_loop;
                    loop_cnt   <= loop_cnt - 1;
                end
            end
        end

        assign last_loop_when_next_sect = (sect_len[11 - ADDR_ALIGN : NUM_BEAT_WIDTH] == 0); 
        assign last_loop_when_next_loop = (loop_cnt == 1);

        assign burst_addr_next = first_loop ? sect_addr_buf  : burst_addr_pred;
        assign burst_len_when_next_sect = (NUM_BEAT_WIDTH == 0)    ? 0 :
                                          last_loop_when_next_sect ? sect_len[NUM_BEAT_WIDTH - 1:0] :
                                                                     {NUM_BEAT_WIDTH{1'b1} };
        assign burst_len_when_next_loop = (NUM_BEAT_WIDTH == 0)    ? 0 :
                                          last_loop_when_next_loop ? sect_len_buf[NUM_BEAT_WIDTH - 1:0] :
                                                                     {NUM_BEAT_WIDTH{1'b1} };

        assign rem_req_pack = {rem_req_id, {rem_req_len[LEN_WIDTH-ADDR_ALIGN-1:0], {ADDR_ALIGN{1'b1}}}, rem_req_addr};

        assign rem_req_addr_pred = ost_ctrl_ready &  first_loop ? sect_addr_buf   + (burst_len_pred_plus1 << ADDR_ALIGN) :
                                   ost_ctrl_ready & ~first_loop ? burst_addr_pred + (burst_len_pred_plus1 << ADDR_ALIGN) :
                                  ~ost_ctrl_ready &  first_loop ? sect_addr_buf : 
                                                                  burst_addr_pred;

        assign rem_req_len_pred  = ost_ctrl_ready &  first_loop ? beat_len_buf - burst_len_pred_plus1 :
                                   ost_ctrl_ready & ~first_loop ? rem_req_len  - burst_len_pred_plus1 :
                                  ~ost_ctrl_ready &  first_loop ? beat_len_buf :
                                                                  rem_req_len;
    
        always @(posedge clk)
        begin
            if (reset) begin
                burst_len_pred       <= 2**NUM_BEAT_WIDTH - 1;
                burst_len_pred_plus1 <= 2**NUM_BEAT_WIDTH;
            end
            else if (clk_en) begin
                if (next_sect) begin
                    burst_len_pred       <= burst_len_when_next_sect;
                    burst_len_pred_plus1 <= burst_len_when_next_sect + 1;
                end
                else if (next_loop) begin
                    burst_len_pred       <= burst_len_when_next_loop;
                    burst_len_pred_plus1 <= burst_len_when_next_loop + 1;
                end
            end
        end

        always @(posedge clk)
        begin
            if (reset) begin
                burst_id    <= 0;
                burst_addr  <= 0;
                burst_len   <= 0;
                burst_addr_pred <= 0;
            end
            else if (clk_en) begin
                if (next_loop) begin
                    burst_id    <= req_id_buf;
                    burst_addr  <= burst_addr_next;
                    burst_len   <= burst_len_pred;
                    burst_addr_pred <= burst_addr_next + (burst_len_pred_plus1 << ADDR_ALIGN);
                end
            end
        end

        always @(posedge clk)
        begin
            if (reset) begin
                rem_req_id   <= 0;
                rem_req_addr <= 0;
                rem_req_len  <= 0;
            end
            else if (clk_en) begin
                if (read_loop) begin
                    rem_req_id   <= req_id_buf;
                    rem_req_addr <= rem_req_addr_pred;
                    rem_req_len  <= rem_req_len_pred;
                end
            end
        end

        always @(posedge clk)
        begin
            if (reset)
                next_req_ready <= 1'b0;
            else if (clk_en) begin
                if (next_req)
                    next_req_ready <= 1'b1;
                else if (next_sect)
                    next_req_ready <= 1'b0;
            end
        end

        always @(posedge clk)
        begin
            if (reset)
                rem_req_valid <= 1'b0;
            else if (clk_en) begin
                if (next_loop && last_loop && last_sect_buf)
                    rem_req_valid <= 1'b0;
                else if (next_req_ready && read_loop)
                    rem_req_valid <= 1'b1;
                else if (req_full_n)
                    rem_req_valid <= 1'b0;
            end
        end
    end
    endgenerate

endmodule

module s2mm_gmem_m_axi_burst_sequential
#(parameter
    ID_WIDTH                     = 1,
    DATA_WIDTH                   = 32,
    ADDR_WIDTH                   = 32,
    LEN_WIDTH                    = 32,
    NUM_PORTS                    = 1,
    MAX_BURST_LEN                = 16
)(
    input  wire                  clk,
    input  wire                  reset,
    input  wire                  clk_en,
    input  wire [ID_WIDTH-1:0]   in_REQ_ID,
    input  wire [ADDR_WIDTH-1:0] in_REQ_ADDR,
    input  wire [LEN_WIDTH-1:0]  in_REQ_LEN,
    input  wire                  in_REQ_VALID,
    output wire [NUM_PORTS-1:0]  out_REQ_READY,
    output wire [ID_WIDTH-1:0]   out_BURST_ID,
    output wire [ADDR_WIDTH-1:0] out_BURST_ADDR,
    output wire [7:0]            out_BURST_LEN,
    output wire                  out_BURST_VALID,
    input  wire                  in_BURST_READY,
    output wire [ID_WIDTH-1:0]   out_CTRL_ID,
    output wire                  out_CTRL_INFO,
    output wire                  out_CTRL_VALID,
    input  wire [NUM_PORTS-1:0]  in_CTRL_READY
);
//------------------------Parameter----------------------
    localparam
        PACK_WIDTH      = ID_WIDTH+ADDR_WIDTH+LEN_WIDTH,
        DATA_BYTES      = DATA_WIDTH / 8,
        ADDR_ALIGN      = log2(DATA_BYTES),
        BOUNDARY_BEATS  = {12-ADDR_ALIGN{1'b1}},
        NUM_BEAT_WIDTH  = log2(MAX_BURST_LEN);
//------------------------Task and function--------------
    function integer log2;
        input integer x;
        integer n, m;
        begin
            n = 0;
            m = 1;
            while (m < x) begin
                n = n + 1;
                m = m * 2;
            end
            log2 = n;
        end
    endfunction
//------------------------Local signal-------------------
    wire [PACK_WIDTH-1:0]       req_pack_in;
    wire [PACK_WIDTH-1:0]       req_pack_out;
    wire [ID_WIDTH-1:0]         req_id_tmp;
    wire [ADDR_WIDTH-1:0]       req_addr_tmp;
    wire [LEN_WIDTH-1:0]        req_len_tmp;

    wire                        req_full_n;
    wire                        req_empty_n;
    wire                        write_req;
    wire                        read_req;
    wire                        next_req;

    reg  [ADDR_WIDTH - 1:0]     start_addr;
    wire [ADDR_WIDTH - 1:0]     sect_addr;
    reg  [ADDR_WIDTH - 1:0]     sect_addr_buf;
    reg  [ID_WIDTH-1:0]         req_id;
    reg  [ID_WIDTH-1:0]         req_id_buf;
    reg                         req_handling;

    reg  [11 - ADDR_ALIGN:0]    start_to_4k;
    reg  [11 - ADDR_ALIGN:0]    end_from_4k;
    wire [11 - ADDR_ALIGN:0]    sect_len;
    reg  [11 - ADDR_ALIGN:0]    sect_len_buf;
    reg  [11 - ADDR_ALIGN:0]    beat_len;
    
    reg  [ADDR_WIDTH-13:0]      sect_cnt;
    reg  [LEN_WIDTH-13:0]       sect_total;
    reg  [LEN_WIDTH-13:0]       sect_total_buf;
    wire [LEN_WIDTH-13:0]       sect_total_tmp;
    wire                        ready_for_sect;
    
    wire                        single_sect;
    reg                         first_sect;
    reg                         last_sect;
    wire                        last_sect_tmp;
    reg                         last_sect_buf;
    wire                        next_sect;

    reg                         burst_valid;

    wire [ID_WIDTH-1:0]         ost_ctrl_id;
    wire                        ost_ctrl_info;
    wire                        ost_ctrl_valid;
    wire                        ost_ctrl_ready;

//------------------------Instantiation------------------
    s2mm_gmem_m_axi_reg_slice #(
        .DATA_WIDTH     (PACK_WIDTH)
    ) rs_req (
        .clk            (clk),
        .reset          (reset),
        .s_data         (req_pack_in),
        .s_valid        (write_req),
        .s_ready        (req_full_n),
        .m_data         (req_pack_out),
        .m_valid        (req_empty_n),
        .m_ready        (read_req));
    
//------------------------Body--------------------------- 

    assign out_REQ_READY = {NUM_PORTS{req_full_n}};
    assign req_pack_in   = {in_REQ_ID, in_REQ_LEN, in_REQ_ADDR};
    assign write_req     = in_REQ_VALID;

    assign req_id_tmp    = req_pack_out[PACK_WIDTH-1  : ADDR_WIDTH+LEN_WIDTH];
    assign req_len_tmp   = req_pack_out[ADDR_WIDTH+LEN_WIDTH-1 : ADDR_WIDTH];
    assign req_addr_tmp  = req_pack_out[ADDR_WIDTH-1  : 0];

    assign read_req      = last_sect_tmp & next_sect | ~req_handling;
    assign next_req      = read_req & req_empty_n;

    always @(posedge clk)
    begin
        if (reset) begin
            req_id      <= 0;
            start_addr  <= 0;
            beat_len    <= 0;
            sect_total  <= 0;
            end_from_4k <= 0;
            start_to_4k <= 0;
        end
        else if (clk_en) begin
            if (next_req) begin
                req_id      <= req_id_tmp;
                start_addr  <= {req_addr_tmp[ADDR_WIDTH-1:ADDR_ALIGN], {ADDR_ALIGN{1'b0}}};
                beat_len    <= (req_len_tmp[11:0] + req_addr_tmp[ADDR_ALIGN-1:0]) >> ADDR_ALIGN;
                sect_total  <= (req_len_tmp + req_addr_tmp[11:0]) >> 12;
                end_from_4k <= (req_addr_tmp[11:0] + req_len_tmp[11:0]) >> ADDR_ALIGN;
                start_to_4k <= BOUNDARY_BEATS - req_addr_tmp[11:ADDR_ALIGN];
            end
        end
    end

    always @(posedge clk)
    begin
        if (reset)
            req_handling <= 1'b0;
        else if (clk_en) begin
            if (next_req)
                req_handling <= 1'b1;
            else if (~req_empty_n && last_sect_tmp & next_sect)
                req_handling <= 1'b0;
        end
    end

    // 4k boundary
    assign last_sect_tmp  = single_sect || last_sect;

    assign sect_total_tmp = first_sect ? sect_total : sect_total_buf;
    
    assign single_sect  = (sect_total == 0);

    //assign next_sect  = req_handling && ready_for_sect;

    assign sect_addr  = (first_sect)? start_addr : {sect_cnt, {12{1'b0}}};
    
    assign sect_len   = single_sect              ? beat_len :
                        ( first_sect && ~last_sect)? start_to_4k :
                        (~first_sect &&  last_sect)? end_from_4k :
                                                     BOUNDARY_BEATS;

   always @(posedge clk)
    begin
        if (reset) begin
            first_sect <= 1'b0;
            last_sect <= 1'b0;
            sect_cnt <= 0;
        end
        else if (clk_en) begin
            if (next_req) begin
                first_sect <= 1'b1;
                last_sect <= 1'b0;
                sect_cnt <= req_addr_tmp[ADDR_WIDTH-1:12];
            end
            else if (next_sect) begin
                first_sect <= 1'b0;
                last_sect <= (sect_total_tmp == 1);
                sect_cnt <= sect_cnt + 1;
            end
        end
    end

    always @(posedge clk)
    begin
        if (reset) begin
            req_id_buf     <= 0;
            sect_addr_buf  <= 0;
            sect_len_buf   <= 0;
            last_sect_buf  <= 1'b0;
            sect_total_buf <= 0;
        end
        else if (clk_en) begin
            if (next_sect) begin
                req_id_buf     <= req_id;
                sect_addr_buf  <= sect_addr;
                sect_len_buf   <= sect_len;
                last_sect_buf  <= last_sect_tmp;
                sect_total_buf <= sect_total_tmp - 1;
            end
        end
    end

    assign out_CTRL_VALID     = ost_ctrl_valid;
    assign out_CTRL_INFO      = ost_ctrl_info;
    assign out_CTRL_ID        = ost_ctrl_id;

    generate
    if (DATA_BYTES >= 4096/MAX_BURST_LEN) begin : must_one_burst
        wire                   read_sect;

        assign out_BURST_ADDR  = sect_addr_buf;
        assign out_BURST_LEN   = sect_len_buf;
        assign out_BURST_VALID = burst_valid;
        assign out_BURST_ID    = req_id_buf;

        assign ost_ctrl_valid  = next_sect;
        assign ost_ctrl_info   = last_sect_tmp;
        assign ost_ctrl_id     = req_id;
        assign ost_ctrl_ready  = in_CTRL_READY[req_id];

        assign next_sect       = read_sect && ost_ctrl_ready;
        assign read_sect       = req_handling && ready_for_sect;
        assign ready_for_sect  = ~(burst_valid && ~in_BURST_READY);

        always @(posedge clk)
        begin
            if (reset)
                burst_valid <= 1'b0;
            else if (clk_en) begin
                if (next_sect)
                    burst_valid <= 1'b1;
                else if (in_BURST_READY)
                    burst_valid <= 1'b0;
            end
        end
    
    end
    else begin : could_multi_bursts
        reg  [ID_WIDTH-1:0]                       burst_id;
        reg  [ADDR_WIDTH - 1:0]                   burst_addr;
        wire [ADDR_WIDTH - 1:0]                   burst_addr_next;
        reg  [7:0]                                burst_len;
        reg  [8:0]                                burst_len_plus1;
        wire [7:0]                                burst_len_next;

        reg                                       sect_handling;
        reg  [11 - NUM_BEAT_WIDTH - ADDR_ALIGN:0] loop_cnt;
        reg                                       first_loop;
        reg                                       last_loop;
        wire                                      next_loop;
        wire                                      read_loop;
        wire                                      ready_for_loop;

        assign out_BURST_ADDR  = burst_addr;
        assign out_BURST_LEN   = burst_len;
        assign out_BURST_VALID = burst_valid;
        assign out_BURST_ID    = burst_id;

        assign ost_ctrl_valid  = next_loop;
        assign ost_ctrl_info   = last_loop && last_sect_buf;
        assign ost_ctrl_id     = req_id_buf;
        assign ost_ctrl_ready  = in_CTRL_READY[req_id_buf];
        
        assign ready_for_loop  = ~(burst_valid && ~in_BURST_READY);
        assign read_loop       = sect_handling && ready_for_loop;
        assign next_loop       = read_loop && ost_ctrl_ready;

        assign next_sect       = req_handling && ready_for_sect;
        assign ready_for_sect  = ~sect_handling || (last_loop && next_loop);

        always @(posedge clk)
        begin
            if (reset)
                burst_valid <= 1'b0;
            else if (clk_en) begin
                if (next_loop)
                    burst_valid <= 1'b1;
                else if (in_BURST_READY)
                    burst_valid <= 1'b0;
            end
        end

        always @(posedge clk)
        begin
            if (reset)
                sect_handling <= 1'b0;
            else if (clk_en) begin
                if (req_handling && ~sect_handling)
                    sect_handling <= 1'b1;
                else if (~req_handling && last_loop && next_loop)
                    sect_handling <= 1'b0;
            end
        end

        always @(posedge clk)
        begin
            if (reset) begin
                first_loop <= 1'b0;
                last_loop <= 1'b0;
                loop_cnt <= 0;
            end
            else if (clk_en) begin
                if (next_sect) begin
                    first_loop <= 1'b1;
                    last_loop <= (sect_len[11 - ADDR_ALIGN : NUM_BEAT_WIDTH] == 0);
                    loop_cnt <= sect_len[11 - ADDR_ALIGN : NUM_BEAT_WIDTH];
                end
                else if (next_loop) begin
                    first_loop <= 1'b0;
                    last_loop <= (loop_cnt == 1);
                    loop_cnt <= loop_cnt - 1;
                end
            end
        end

        assign burst_addr_next = first_loop ? sect_addr_buf : (burst_addr + (burst_len_plus1 << ADDR_ALIGN));
        assign burst_len_next  = (NUM_BEAT_WIDTH == 0) ? 0 :
                                 last_loop             ? sect_len_buf[NUM_BEAT_WIDTH - 1:0] :
                                                         { NUM_BEAT_WIDTH{1'b1} };

        always @(posedge clk)
        begin
            if (reset) begin
                burst_id   <= 0;
                burst_addr <= 0;
                burst_len  <= 0;
                burst_len_plus1 <= 0;
            end
            else if (clk_en) begin
                if (next_loop) begin
                    burst_id   <= req_id_buf;
                    burst_addr <= burst_addr_next;
                    burst_len  <= burst_len_next;
                    burst_len_plus1 <= burst_len_next + 1;
                end
            end
        end

    end
    endgenerate

endmodule


module s2mm_gmem_m_axi_throttle
#(parameter
    CONSERVATIVE        = 0,
    ID_WIDTH            = 1,
    ADDR_WIDTH          = 32,
    DATA_WIDTH          = 32,
    NUM_OUTSTANDING     = 16,
    ID0_NUM_OUTSTANDING = 2,
    NUM_PORTS           = 1
)(
    input  wire                        clk,
    input  wire                        reset,
    input  wire                        clk_en,
    // internal ports
    input  wire [ID_WIDTH-1:0]         in_BURST_AWID,
    input  wire [ADDR_WIDTH-1:0]       in_BURST_AWADDR,
    input  wire [7:0]                  in_BURST_AWLEN,
    input  wire                        in_BURST_AWVALID,
    output wire                        out_BURST_AWREADY,
    input  wire [ID_WIDTH-1:0]         in_BURST_WID,    // only for conservative mode
    input  wire [7:0]                  in_BURST_WLEN,   // only for conservative mode
    input  wire                        in_BURST_WVALID, // only for conservative mode
    output wire                        out_BURST_WREADY,// only for conservative mode
    output wire [ID_WIDTH-1:0]         out_AXI_WID,
    input  wire [DATA_WIDTH-1:0]       in_AXI_WDATA,
    input  wire [DATA_WIDTH/8-1:0]     in_AXI_WSTRB,
    input  wire                        in_AXI_WVALID,
    output wire                        out_AXI_WREADY,
    // axi bus ports
    output wire [ID_WIDTH-1:0]         out_BUS_AWID,
    output wire [ADDR_WIDTH-1:0]       out_BUS_AWADDR,
    output wire [7:0]                  out_BUS_AWLEN,
    output wire                        out_BUS_AWVALID,
    input  wire                        in_BUS_AWREADY,
    output wire [ID_WIDTH-1:0]         out_BUS_WID,
    output wire [DATA_WIDTH-1:0]       out_BUS_WDATA,
    output wire [DATA_WIDTH/8-1:0]     out_BUS_WSTRB,
    output wire                        out_BUS_WLAST,
    output wire                        out_BUS_WVALID,
    input  wire                        in_BUS_WREADY
);

//------------------------Task and function--------------
    function integer log2;
        input integer x;
        integer n, m;
    begin
        n = 0;
        m = 1;
        while (m < x) begin
            n = n + 1;
            m = m * 2;
        end
        log2 = n;
    end
    endfunction

    function [NUM_PORTS-1:0] bit_set;
        input [ID_WIDTH-1:0] idx;
        input                valid;
    begin
        bit_set = {NUM_PORTS{1'b0}};
        bit_set[idx] = valid;
    end
    endfunction

    function integer num_outstanding_val;
        input integer idx;
    begin
        case (idx)
            0 : num_outstanding_val = ID0_NUM_OUTSTANDING;
            default : num_outstanding_val = 0;
        endcase
    end
    endfunction
//------------------------Local signal-------------------
    wire                        local_AXI_WREADY;
    reg  [ID_WIDTH-1 : 0]       local_BUS_WID;
    reg  [DATA_WIDTH-1 : 0]     local_BUS_WDATA;
    reg  [DATA_WIDTH/8-1 : 0]   local_BUS_WSTRB;
    reg                         local_BUS_WVALID;
    reg                         local_BUS_WLAST;

    wire [ID_WIDTH-1:0]         local_BURST_AWID;
    wire [ADDR_WIDTH-1:0]       local_BURST_AWADDR;
    wire [7:0]                  local_BURST_AWLEN;
    wire                        local_BURST_AWVALID;
    wire                        local_BURST_AWREADY;

    wire [ID_WIDTH-1 : 0]       local_BURST_WID;
    wire [7:0]                  local_BURST_WLEN;
    wire                        local_BURST_WVALID;
    wire                        local_BURST_WREADY;
    
    reg                         burst_handling;
    wire                        ready_for_burst;
    wire                        ready_for_beat;
    wire                        next_burst;
    wire                        next_beat;
    reg  [7:0]                  num_beat_cnt;
//------------------------Body---------------------------

    // AW Channel
    s2mm_gmem_m_axi_reg_slice #(
        .DATA_WIDTH            (ID_WIDTH + ADDR_WIDTH + 8)
    ) rs_req (
        .clk                   (clk),
        .reset                 (reset),
        .s_data                ({local_BURST_AWID, local_BURST_AWLEN, local_BURST_AWADDR}),
        .s_valid               (local_BURST_AWVALID),
        .s_ready               (local_BURST_AWREADY),
        .m_data                ({out_BUS_AWID, out_BUS_AWLEN, out_BUS_AWADDR}),
        .m_valid               (out_BUS_AWVALID),
        .m_ready               (in_BUS_AWREADY));

    always @(posedge clk)
    begin
        if (reset)
            burst_handling <= 1'b0;
        else if (clk_en) begin
            if (local_BURST_WVALID && (~local_BURST_WREADY) && ready_for_burst)
                burst_handling <= 1'b1;
            else
                burst_handling <= 1'b0;
        end
    end

    // aggressive mode
    generate if (CONSERVATIVE == 0) begin

        s2mm_gmem_m_axi_reg_slice #(
            .DATA_WIDTH            (ID_WIDTH + ADDR_WIDTH + 8)
        ) rs_burst (
            .clk                   (clk),
            .reset                 (reset),
            .s_data                ({in_BURST_AWID, in_BURST_AWLEN, in_BURST_AWADDR}),
            .s_valid               (in_BURST_AWVALID),
            .s_ready               (out_BURST_AWREADY),
            .m_data                ({local_BURST_WID, local_BURST_WLEN, local_BURST_AWADDR}),
            .m_valid               (local_BURST_WVALID),
            .m_ready               (local_BURST_WREADY));

        assign out_BURST_WREADY    = 1'b0;
        assign local_BURST_AWID    = local_BURST_WID;
        assign local_BURST_AWLEN   = local_BURST_WLEN;
        assign local_BURST_AWVALID = local_BURST_WVALID && ~burst_handling;
    end
    // conservative mode
    else begin
        genvar                     idx;
        wire [NUM_PORTS-1:0]       burst_ready;
        wire [NUM_PORTS-1:0]       burst_valid;
        wire [NUM_PORTS-1:0]       burst_write;
        wire [NUM_PORTS-1:0]       burst_read;
        wire [ADDR_WIDTH-1:0]      burst_addr [0:NUM_PORTS-1];
        
        for (idx = 0; idx < NUM_PORTS; idx = idx + 1) begin : fifo_burst_gen
            s2mm_gmem_m_axi_fifo #(
                .DATA_WIDTH        (ADDR_WIDTH),
                .ADDR_WIDTH        (log2(num_outstanding_val(idx))),
                .DEPTH             (num_outstanding_val(idx))
            ) fifo_req (
                .clk               (clk),
                .reset             (reset),
                .clk_en            (clk_en),
                .if_full_n         (burst_ready[idx]),
                .if_write          (burst_write[idx]),
                .if_din            (in_BURST_AWADDR),
                .if_empty_n        (burst_valid[idx]),
                .if_read           (burst_read[idx]),
                .if_dout           (burst_addr[idx]),
                .if_num_data_valid ());
        end

        s2mm_gmem_m_axi_reg_slice #(
            .DATA_WIDTH            (ID_WIDTH + 8)
        ) rs_burst (
            .clk                   (clk),
            .reset                 (reset),
            .s_data                ({in_BURST_WID, in_BURST_WLEN}),
            .s_valid               (in_BURST_WVALID),
            .s_ready               (out_BURST_WREADY),
            .m_data                ({local_BURST_WID, local_BURST_WLEN}),
            .m_valid               (local_BURST_WVALID),
            .m_ready               (local_BURST_WREADY));

        assign out_BURST_AWREADY   = |burst_ready;
        assign local_BURST_AWID    = local_BURST_WID;
        assign local_BURST_AWLEN   = local_BURST_WLEN;
        assign local_BURST_AWADDR  = burst_addr[local_BURST_WID];
        assign local_BURST_AWVALID = burst_valid[local_BURST_WID] && local_BURST_WVALID && ~burst_handling;

        assign burst_write         = bit_set(in_BURST_AWID, in_BURST_AWVALID);
        assign burst_read          = bit_set(local_BURST_WID, local_BURST_WVALID & local_BURST_WREADY);
    end
    endgenerate

    // W Channel
    assign out_AXI_WID             = local_BURST_WID;
    assign out_AXI_WREADY          = local_AXI_WREADY;
    assign out_BUS_WID             = local_BUS_WID;
    assign out_BUS_WDATA           = local_BUS_WDATA;
    assign out_BUS_WSTRB           = local_BUS_WSTRB;
    assign out_BUS_WLAST           = local_BUS_WLAST;
    assign out_BUS_WVALID          = local_BUS_WVALID;

    assign local_AXI_WREADY        = local_BURST_WVALID && ready_for_burst && ready_for_beat;
    assign local_BURST_WREADY      = next_burst && ready_for_burst;

    assign ready_for_burst         = burst_handling || local_BURST_AWREADY;
    assign ready_for_beat          = (~local_BUS_WVALID || in_BUS_WREADY);
    assign next_burst              = (num_beat_cnt == local_BURST_WLEN) && next_beat;
    assign next_beat               = in_AXI_WVALID && local_AXI_WREADY;

    always @(posedge clk)
    begin
        if (reset) begin
            local_BUS_WDATA <= 0;
            local_BUS_WSTRB <= 0;
            local_BUS_WID   <= 0;
        end
        if (clk_en) begin
            if (next_beat) begin
                local_BUS_WID   <= local_BURST_WID;
                local_BUS_WDATA <= in_AXI_WDATA;
                local_BUS_WSTRB <= in_AXI_WSTRB;
            end
        end
    end

    always @(posedge clk)
    begin
        if (reset)
            local_BUS_WVALID <= 1'b0;
        else if (clk_en) begin
            if (next_beat)
                local_BUS_WVALID <= 1'b1;
            else if (ready_for_beat)
                local_BUS_WVALID <= 1'b0;
        end
    end

    always @(posedge clk)
    begin
        if (reset)
            local_BUS_WLAST <= 1'b0;
        else if (clk_en) begin
            if (next_burst)
                local_BUS_WLAST <= 1'b1;
            else if (ready_for_beat)
                local_BUS_WLAST <= 1'b0;
        end
    end

    always @(posedge clk)
    begin
        if (reset)
            num_beat_cnt <= 0;
        else if (clk_en) begin
            if (next_burst)
                num_beat_cnt <= 0;
            else if (next_beat)
                num_beat_cnt <= num_beat_cnt + 1;
        end
    end

endmodule


module s2mm_gmem_m_axi_reg_slice
#(parameter
    DATA_WIDTH = 8
) (
    // system signals
    input  wire                  clk,
    input  wire                  reset,
    // slave side
    input  wire [DATA_WIDTH-1:0] s_data,
    input  wire                  s_valid,
    output wire                  s_ready,
    // master side
    output wire [DATA_WIDTH-1:0] m_data,
    output wire                  m_valid,
    input  wire                  m_ready
);
//------------------------Parameter----------------------
    // state
    localparam [1:0]
        ZERO = 2'b10,
        ONE  = 2'b11,
        TWO  = 2'b01;
//------------------------Local signal-------------------
    reg  [DATA_WIDTH-1:0] data_p1;
    reg  [DATA_WIDTH-1:0] data_p2;
    wire         load_p1;
    wire         load_p2;
    wire         load_p1_from_p2;
    reg          s_ready_t;
    reg  [1:0]   state;
    reg  [1:0]   next;
//------------------------Body---------------------------
    assign s_ready = s_ready_t;
    assign m_data  = data_p1;
    assign m_valid = state[0];

    assign load_p1 = (state == ZERO && s_valid) ||
                    (state == ONE && s_valid && m_ready) ||
                    (state == TWO && m_ready);
    assign load_p2 = s_valid & s_ready;
    assign load_p1_from_p2 = (state == TWO);

    // data_p1
    always @(posedge clk) begin
        if (load_p1) begin
            if (load_p1_from_p2)
                data_p1 <= data_p2;
            else
                data_p1 <= s_data;
        end
    end

    // data_p2
    always @(posedge clk) begin
        if (load_p2) data_p2 <= s_data;
    end

    // s_ready_t
    always @(posedge clk) begin
        if (reset)
            s_ready_t <= 1'b0;
        else if (state == ZERO)
            s_ready_t <= 1'b1;
        else if (state == ONE && next == TWO)
            s_ready_t <= 1'b0;
        else if (state == TWO && next == ONE)
            s_ready_t <= 1'b1;
    end

    // state
    always @(posedge clk) begin
        if (reset)
            state <= ZERO;
        else
            state <= next;
    end

    // next
    always @(*) begin
        case (state)
            ZERO:
                if (s_valid & s_ready)
                    next = ONE;
                else
                    next = ZERO;
            ONE:
                if (~s_valid & m_ready)
                    next = ZERO;
                else if (s_valid & ~m_ready)
                    next = TWO;
                else
                    next = ONE;
            TWO:
                if (m_ready)
                    next = ONE;
                else
                    next = TWO;
            default:
                next = ZERO;
        endcase
    end
endmodule


module s2mm_gmem_m_axi_fifo
#(parameter
    MEM_STYLE   = "shiftreg",
    DATA_WIDTH = 32,
    ADDR_WIDTH = 5,
    DEPTH      = 32
) (
    // system signal
    input  wire                  clk,
    input  wire                  reset,
    input  wire                  clk_en,
    // write
    output wire                  if_full_n,
    input  wire                  if_write,
    input  wire [DATA_WIDTH-1:0] if_din,
    // read
    output wire                  if_empty_n,
    input  wire                  if_read,
    output wire [DATA_WIDTH-1:0] if_dout,
    output wire [ADDR_WIDTH:0]   if_num_data_valid);

//------------------------Local signal-------------------

    wire                  push;
    wire                  pop;
    reg                   full_n = 1'b1;
    reg                   empty_n = 1'b0;
    reg                   dout_vld = 1'b0;
//------------------------Body---------------------------

    generate if (DEPTH == 0) begin
        assign if_num_data_valid = if_write;
        assign if_full_n  = if_read;
        assign if_empty_n = if_write;
        assign if_dout = if_din;

    end else if (DEPTH == 1) begin
        reg [DATA_WIDTH-1:0] dout_reg;

        assign if_num_data_valid = dout_vld;
        assign if_full_n  = !dout_vld;
        assign if_empty_n = dout_vld;
        assign if_dout = dout_reg;
        assign push = !dout_vld & if_write;
        assign pop  = dout_vld & if_read;

        always @(posedge clk)
        begin
            if (reset == 1'b1)
                dout_reg <= 0;
            else if (clk_en) begin
                if (push)
                    dout_reg <= if_din;
            end
        end

        // dout_vld
        always @(posedge clk) begin
            if (reset == 1'b1)
                dout_vld <= 1'b0;
            else if (clk_en)
                if (push)
                    dout_vld <= 1'b1;
                else if (pop)
                    dout_vld <= 1'b0;
        end
    
    end else if (DEPTH > 1) begin
        reg  [ADDR_WIDTH-1:0] raddr = {ADDR_WIDTH{1'b0}};
        reg  [ADDR_WIDTH-1:0] waddr = {ADDR_WIDTH{1'b0}};
        reg  [ADDR_WIDTH:0]   mOutPtr = {ADDR_WIDTH+1{1'b0}};
        reg  [ADDR_WIDTH:0]   num_data_cnt = {ADDR_WIDTH+1{1'b0}};
        wire                  pop_dout;

        assign if_num_data_valid = dout_vld ? num_data_cnt : {ADDR_WIDTH+1{1'b0}};
        assign if_full_n  = full_n;
        assign if_empty_n = dout_vld;
        assign push = full_n & if_write;
        assign pop  = empty_n & (if_read | ~dout_vld);
        assign pop_dout = dout_vld & if_read;

        if (MEM_STYLE == "shiftreg") begin
            s2mm_gmem_m_axi_srl
            #(  .DATA_WIDTH     (DATA_WIDTH),
                .ADDR_WIDTH     (ADDR_WIDTH),
                .DEPTH          (DEPTH))
            U_fifo_srl(
                .clk            (clk),
                .reset          (reset),
                .clk_en         (clk_en),
                .we             (push),
                .din            (if_din),
                .raddr          (raddr),
                .re             (pop),
                .dout           (if_dout)
            );
            // raddr
            always @(posedge clk) begin
                if (reset == 1'b1)
                    raddr <= {ADDR_WIDTH+1{1'b0}};
                else if (clk_en) begin
                    if (push & ~pop & empty_n)
                        raddr <= raddr + 1'b1;
                    else if (~push & pop && raddr != 0)
                        raddr <= raddr - 1'b1;
                end
            end
        end else begin
            wire [ADDR_WIDTH-1:0] wnext;
            wire [ADDR_WIDTH-1:0] rnext;

            s2mm_gmem_m_axi_mem
            #(  .MEM_STYLE      (MEM_STYLE),
                .DATA_WIDTH     (DATA_WIDTH),
                .ADDR_WIDTH     (ADDR_WIDTH),
                .DEPTH          (DEPTH))
            U_fifo_mem(
                .clk            (clk),
                .reset          (reset),
                .clk_en         (clk_en),
                .we             (push),
                .waddr          (waddr),
                .din            (if_din),
                .raddr          (raddr),
                .re             (pop),
                .dout           (if_dout)
            );

            assign wnext =  !push                ? waddr :
                            (waddr == DEPTH - 2) ? 1'b0  :
                            waddr + 1'b1;
            assign rnext =  !pop                 ? raddr :
                            (raddr == DEPTH - 2) ? 1'b0  :
                            raddr + 1'b1;

            // waddr
            always @(posedge clk) begin
                if (reset == 1'b1)
                    waddr <= {ADDR_WIDTH{1'b0}};
                else if (clk_en)
                    waddr <= wnext;
            end

            // raddr
            always @(posedge clk) begin
                if (reset == 1'b1)
                    raddr <= {ADDR_WIDTH{1'b0}};
                else if (clk_en)
                    raddr <= rnext;
            end
        end

        // mOutPtr
        always @(posedge clk) begin
            if (reset == 1'b1)
                mOutPtr <= {ADDR_WIDTH+1{1'b0}};
            else if (clk_en)
                if (push & ~pop)
                    mOutPtr <= mOutPtr + 1'b1;
                else if (~push & pop)
                    mOutPtr <= mOutPtr - 1'b1;
        end

        // num_data_cnt
        always @(posedge clk) begin
            if (reset == 1'b1)
                num_data_cnt <= {ADDR_WIDTH+1{1'b0}};
            else if (clk_en)
                if ( push & ~pop_dout)
                    num_data_cnt <= num_data_cnt + 1'b1;
                else if (~push & pop_dout)
                    num_data_cnt <= num_data_cnt - 1'b1;
        end

        // full_n
        always @(posedge clk) begin
            if (reset == 1'b1)
                full_n <= 1'b1;
            else if (clk_en)
                if ((push & ~pop_dout) && (num_data_cnt == DEPTH - 1))
                    full_n <= 1'b0;
                else if (~push & pop_dout)
                    full_n <= 1'b1;
        end

        // empty_n
        always @(posedge clk)
        begin
            if (reset == 1'b1)
                empty_n <= 1'b0;
            else if (clk_en) begin
                if (push & ~pop)
                    empty_n <= 1'b1;
                else if ((~push & pop) && (mOutPtr == 1))
                    empty_n <= 1'b0;
            end
        end

        // dout_vld
        always @(posedge clk) begin
            if (reset == 1'b1)
                dout_vld <= 1'b0;
            else if (clk_en)
                if (pop)
                    dout_vld <= 1'b1;
                else if (pop_dout)
                    dout_vld <= 1'b0;
        end
    end
    endgenerate

endmodule

module s2mm_gmem_m_axi_srl
#(parameter
        DATA_WIDTH  = 32,
        ADDR_WIDTH  = 6,
        DEPTH       = 63
    )(
        input  wire                  clk,
        input  wire                  reset,
        input  wire                  clk_en,
        input  wire                  we,
        input  wire [DATA_WIDTH-1:0] din,
        input  wire [ADDR_WIDTH-1:0] raddr,
        input  wire                  re,
        output reg  [DATA_WIDTH-1:0] dout
    );

    reg  [DATA_WIDTH-1:0] mem[0:DEPTH-2];

    integer i;
    always @(posedge clk)
    begin
        if (clk_en & we) begin
            for (i = 0; i < DEPTH - 2; i = i + 1) begin
                mem[i+1] <= mem[i];
            end
            mem[0] <= din;
        end
    end

    always @(posedge clk)
    begin
        if (reset)
            dout <= 0;
        else if (clk_en & re) begin
            dout <= mem[raddr];
        end
    end

endmodule

module s2mm_gmem_m_axi_mem
#(parameter
    MEM_STYLE   = "auto",
    DATA_WIDTH  = 32,
    ADDR_WIDTH  = 6,
    DEPTH       = 63
)(
    input  wire                  clk,
    input  wire                  reset,
    input  wire                  clk_en,
    input  wire                  we,
    input  wire [ADDR_WIDTH-1:0] waddr,
    input  wire [DATA_WIDTH-1:0] din,
    input  wire [ADDR_WIDTH-1:0] raddr,
    input  wire                  re,
    output reg  [DATA_WIDTH-1:0] dout);

    (* ram_style = MEM_STYLE *)
    reg  [DATA_WIDTH-1:0] mem[0:DEPTH-2];

    //write to ram
    always @(posedge clk) begin
        if (clk_en & we)
            mem[waddr] <= din;
    end

    //read from ram
    always @(posedge clk) begin
        if (reset)
            dout <= 0;
        else if (clk_en & re)
            dout <= mem[raddr];
    end
endmodule
