#if !ASM_MODE
#include <adf.h>
#include <adf/adf_api/AIERuntimeControl.h>
#include "super.hh"
#include "graph.hpp"
#endif // !ASM_MODE
#ifdef __AIESIM__
#include "dma.hpp"
#endif // __AIESIM__

// NOTE: how many of these are needed?
#include <string>
#include <sstream>
#include <fstream>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include <fenv.h>
#include <iostream>
#include <vector>

#include "common.hpp"
#include "gap.hpp"

using namespace std;

#if !ASM_MODE
ComputeGraph g_compute_graph;
#endif // !ASM_MODE

int run_globalavgpool_a8(
    int Ci, int Yi, int Xi,  // Input dimensions
    int Co, int Yo, int Xo,  // Output dimensions
    int shift, int scale,    // Computation parameters
    int read_ifm, int read_wgt  // Random or read from file
)
{
    int ifm_size = ActTensor<int8_t>::size(Ci, Yi, Xi);
    // NOTE: Some cases of gap might use weighs; do we need this?
    int wgt_size = ActTensor<int8_t>::size(64, 1, 1);
    int ofm_size = ActTensor<int8_t>::size(Co, Yo, Xo);

#if !ASM_MODE
    ActTensor<int8_t> aie_ifm(Ci, Yi, Xi, adf::GMIO::malloc(ifm_size));
    ActTensor<int8_t> aie_wgt(Ci, Yi, Xi, adf::GMIO::malloc(wgt_size));
    ActTensor<int8_t> aie_ofm(Co, Yo, Xo, adf::GMIO::malloc(ofm_size));
#else
    ActTensor<int8_t> aie_ifm(Ci, Yi, Xi, malloc(ifm_size));
    ActTensor<int8_t> aie_wgt(Ci, Yi, Xi, malloc(wgt_size));
#endif // !ASM_MODE

    printf("DDR IFM SIZE = %d \n", ifm_size);
    printf("DDR OFM SIZE = %d \n", ofm_size);

    ActTensor<int8_t> cpu_ofm(Co, Yo, Xo, malloc(ofm_size));
    //NOTE: Used in old gap; do we need this?
    srand(0xABCD);

    init_random_globalavgpool_a8(aie_ifm, -127, 128);

    std::string const ifm_bin_path = "../intermediate_bins/ifm1.bin";
    std::string const wgt_bin_path = "../intermediate_bins/wgt.bin";
    if (read_ifm) {
        read_bin_file(ifm_bin_path, reinterpret_cast<char*>(aie_ifm.data), ifm_size);
    }
    if (read_wgt) {
        read_bin_file(wgt_bin_path, reinterpret_cast<char*>(aie_wgt.data), wgt_size);
    }

    cpu_globalavgpool_a8(aie_ifm, cpu_ofm, shift, scale);
#if ASM_MODE
    write_bin_file("ifm.bin", (char*)aie_ifm.data, ifm_size);
    write_bin_file("wgt.bin", (char*)aie_wgt.data, wgt_size);
    write_bin_file("ofm.bin", (char*)cpu_ofm.data, ofm_size);
    write_external_buffer_json(ofm_size, ifm_size, wgt_size);
#endif // ASM_MODE
#ifdef __AIESIM__
#if !ASM_MODE
#if USE_CERT_LIBRARY
    run_cert_sim(g_compute_graph,
                 reinterpret_cast<void*>(aie_ofm.data), ofm_size,
                 reinterpret_cast<void*>(aie_ifm.data), ifm_size,
                 reinterpret_cast<void*>(aie_wgt.data), wgt_size);
#else
    g_compute_graph.init();
    run_dma_layer_config(g_compute_graph, aie_ofm.data, aie_ifm.data, aie_wgt.data); // NOTE: why doe we need wgt here?
    g_compute_graph.end();
#endif //USE_CERT_LIBRARY

    // NOTE: Print function is not defined
    //aie_ifm.print("IFM =\n");
    //aie_wgt.print("WGT =\n");
    //cpu_ofm.print("CPU OFM =\n");
    int epsilon = 1;
    int ofm_sign = 1;
    int err = cmp_tensor(cpu_ofm, aie_ofm, ofm_sign, epsilon);
    printf("Error Count = %d\n", err);
    if (err > 0) {
        printf("DI_FAIL: Y=%d X=%d C=%d\n", Yi, Xi, Ci);
    } else {
        printf("DI_PASS: Y=%d X=%d C=%d\n", Yi, Xi, Ci);
    }
#endif // !ASM_MODE
#endif // __AIESIM__

#if !ASM_MODE
    adf::GMIO::free(aie_ifm.data);
    adf::GMIO::free(aie_wgt.data);
    adf::GMIO::free(aie_ofm.data);
#else
    free(aie_ifm.data);
    free(aie_wgt.data);
#endif // !ASM_MODE
    free(cpu_ofm.data);
    assert(false);
    return 0;
}

int main(void)
{
    auto cfg = load_json("gap_cfg.json");
    int const Ci = extract_json(cfg, "C_IN");
    int const Yi = extract_json(cfg, "Y_IN");
    int const Xi = extract_json(cfg, "X_IN");
    int const Co = extract_json(cfg, "C_OUT");
    int const Yo = extract_json(cfg, "Y_OUT");
    int const Xo = extract_json(cfg, "X_OUT");
    // NOTE: shift will change for each layer; and will be packed them in wgts otherwise we dont need to pass weights
    int const shift = extract_json(cfg, "SHIFT");
    int const scale = extract_json(cfg, "SCALE");

    int const read_ifm = extract_json(cfg, "READ_IFM");
    int const read_wgt = extract_json(cfg, "READ_WGT");

    printf("IFM dimension: CIN x YIN x XIN = %d x %d x %d \n", Ci, Yi, Xi);
    printf("OFM dimension: COUT x YOUT x XOUT = %d x %d x %d \n", Co, Yo, Xo);

    run_globalavgpool_a8(Ci, Yi, Xi, Co, Yo, Xo, shift, scale, read_ifm, read_wgt);

    return 0;
}
