#ifndef __TXNRT__
#include <adf.h>
#include <adf/adf_api/AIERuntimeControl.h>
#include "super.hh"
#include "graph.hpp"
#endif // __TXNRT__
#if defined(__AIESIM__) || defined(__TXNRT__)
#include "dma.hpp"
#endif // __AIESIM__ || __TXNRT__

using namespace std;

#include <iostream>
#include "tensor.hpp"
#include <vector>
#include <cstring>
#include <cassert>

void write_bin_file(std::string filename, char* data, size_t size) {
    std::fstream file;
    file.open(filename, std::ios::out | std::ios::binary);
    file.write(data, size);
}

#ifndef __TXNRT__
ComputeGraph g_compute_graph;
#endif // __TXNRT__

int main() {
    srand(0xABCD);
    using Telem = int16_t;

    int aie_rows = AIE_ROWS;
    int aie_cols = AIE_COLS;
    int constexpr h_in = H_IN;
    int constexpr w_in = W_IN;
    int constexpr c_in = C_IN;
    int constexpr h_out = H_OUT;
    int constexpr w_out = W_OUT;
    int constexpr c_out = C_OUT;
    int constexpr num_interpolations = NUM_INTERPOLATIONS;

    int ifm_size = h_in * w_in * c_in * sizeof(Telem);
    int wgt_size = 1;
    int ofm_size = h_out * w_out * c_out * sizeof(Telem);

#ifdef __TXNRT__
    auto aie_ifm = static_cast<Telem*>(malloc(ifm_size));
    auto aie_wgt = static_cast<Telem*>(malloc(wgt_size));
    auto aie_ofm = static_cast<Telem*>(malloc(ofm_size));
#else
    auto aie_ifm = static_cast<Telem*>(adf::GMIO::malloc(ifm_size));
    auto aie_wgt = static_cast<Telem*>(adf::GMIO::malloc(wgt_size));
    auto aie_ofm = static_cast<Telem*>(adf::GMIO::malloc(ofm_size));
#endif // __TXNRT__
    auto cpu_ofm = static_cast<Telem*>(malloc(ofm_size));

    Tensor<int16_t> ifm(h_in, w_in, c_in, aie_ifm);
    Tensor<int16_t> aie_out_mat(h_out, w_out, c_out, aie_ofm);
    Tensor<int16_t> cpu_out_mat(h_out, w_out, c_out, cpu_ofm);

    #if INT_16
        rand_tensor(ifm, -32768, 32767, 1);
    #elif BFLOAT_16
        rand_tensor(ifm, -128, 128, 0);
    #endif

    cpu_nni(ifm, cpu_out_mat, num_interpolations);

#if defined(__AIESIM__) || defined(__TXNRT__)
    #ifdef __TXNRT__
            DmaBins bins = run_dma_layer_config();
            bins.save();
            write_bin_file("ifm.bin", reinterpret_cast<char*>(aie_ifm), ifm_size);
            write_bin_file("wgt.bin", reinterpret_cast<char*>(aie_wgt), wgt_size);
            write_bin_file("ofm.bin", reinterpret_cast<char*>(cpu_ofm), ofm_size);
    #else
    g_compute_graph.init();
    run_dma_layer_config(g_compute_graph, aie_ofm, aie_ifm, aie_wgt);
    g_compute_graph.end();

    print_matrix(ifm, "AIE IFM =\n", INT_16);
    print_matrix(cpu_out_mat, "CPU OFM =\n", INT_16);
    print_matrix(aie_out_mat, "AIE OFM =\n", INT_16);

    if(!(check_result(cpu_out_mat, aie_out_mat, INT_16))) {
        printf("DI: PASS\n");
    }
    else {
        printf("DI: FAIL\n");
    }
    #endif // __TXNRT__
#endif // __AIESIM__ || __TXNRT__

    #ifdef __TXNRT__
        free(aie_ifm);
        free(aie_wgt);
        free(aie_ofm);
    #else
        adf::GMIO::free(aie_ifm);
        adf::GMIO::free(aie_wgt);
        adf::GMIO::free(aie_ofm);
    #endif // __TXNRT__
    free(cpu_ofm);

    #ifndef __TXNRT__
    assert(false);
    #endif // __TXNRT__
    return 0;
}
