#pragma once

#include <fstream>
#include <iostream>
#include <map>
#include <tuple>
#include <utility>
#include "op_common.hpp"
#include "support_ops.hpp"

namespace waic_runner {
    template <typename InT, typename WtT, typename OutT>
    class subgraph_op {
    private:
        std::vector<int64_t> inputShape_;
        std::vector<int64_t> outputShape_;
        std::vector<std::string> input_format_;
        std::vector<std::string> output_format_;
        std::vector<std::string> input_datatype_;
        std::vector<std::string> output_datatype_;
        size_t onnx_arg_idx_[3];
        size_t BO_seq_[5];
        size_t number_inputs_;
        size_t number_outputs_;
        size_t input_shape_size_;
        size_t output_shape_size_;

        std::string layer_name_;
        std::string tkey_{};
        std::string bkend_{};
        std::string conv_special_flag_{};
        std::string binfile_path_{};
        std::string prebuilt_bin_dir_{};
        json tilings_data_{};
        bool use_inmem_ = 0;
        bool verbose_ = 0;
        Support_OpType support_optype_ = NPUOP;

        std::string get_instr_key(std::string prefix) const;

    public:
        subgraph_op(const std::string& op_type,
            const std::string& binfile_path,
            const std::string& prebuilt_bin_dir,
            const json& tilings_data,
            const std::map<std::string, std::any>& attr,
            bool use_inmem);
        ~subgraph_op() {};
        void initialize_const_params(
            ConstBufferIO& io, const std::vector<Tensor>& const_params,
            const std::map<std::string, std::any>& attr = {});

        const std::vector<uint8_t> get_transaction_bin(
            std::vector<Tensor>& input, std::vector<Tensor>& output,
            const std::map<std::string, std::any>& attr = {}) const;
        const std::vector<uint8_t> get_super_kernel_params(
            std::vector<Tensor>& input, std::vector<Tensor>& output,
            const std::map<std::string, std::any>& attr = {}) const;
        std::vector<OpArgMap> get_buffer_reqs(
            std::vector<Tensor>& input, std::vector<Tensor>& output,
            const std::map<std::string, std::any>& attr = {}) const;
        std::vector<CtrlPktPatchInfo> get_ctrl_pkt_patch_info(
            std::vector<Tensor>& input, std::vector<Tensor>& output,
            const std::map<std::string, std::any>& attr = {}) const;
        std::vector<uint8_t> get_ctrl_pkts(
            std::vector<Tensor>& input, std::vector<Tensor>& output,
            const std::map<std::string, std::any>& attr = {}) const;
        void format_output(const Tensor& out_tensor, void* hw_out_ptr, size_t sz,
            size_t tensor_idx, const std::map<std::string, std::any>& attr = {});
        void format_input(const Tensor& in_tensor, void* hw_in_ptr, size_t sz,
            size_t tensor_idx, const std::map<std::string, std::any>& attr = {});
        std::tuple<size_t, size_t, size_t> extract_BMN(
            const std::vector<size_t> shape);
        std::tuple<size_t, size_t, size_t> extract_MN(
            const std::vector<size_t> shape);
        std::tuple<size_t, size_t, size_t, size_t> extract_NHWC(
            const std::vector<size_t> shape);
        std::vector<size_t> get_pad_shape(const std::vector<int64_t> shape,
            size_t tensor_number, size_t tensor_idx);
        float bfloat_to_float(uint16_t x);
        uint16_t float_to_bfloat16_1(float x);
        bool get_verbose() {return verbose_;};
        void set_verbose(const bool verbose) { verbose_ = verbose; };
    };

} // namespace ryzenai
