
#include <map>
#include <unordered_map>
#include <boost/algorithm/string.hpp>
#include <vector>
#include <mutex>
#include <chrono>
#include <iostream>
#include <fstream>
#include "external.hpp"
#include "functions.hpp"
#include "fasta.hpp"


using namespace std;

int notitle = 0;

void check_transcript ()
{
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::transcript" , 1, v_quiet);

    screen_message (screen_size, 2, "Loading BED file ...", 2, v_quiet);
    ifstream bed( v_bed );
    vector <string> line_data;
    map <int,string> bed_data;
    for( std::string line; getline( bed, line ); )
    {
        if (line == "") {continue;}
        boost::split(line_data,line,boost::is_any_of("\t"));
        int start = stoi(line_data[1]);
        bed_data[start] = line;
    }
    bed.close();
    screen_message (screen_size, 2, "Loading BED file ... done", 1, v_quiet);

    
    string v_out_original = v_output;
    notitle = 1;
    for(auto &&item: bed_data)
    {
        vector <string> line_data;
        boost::split(line_data,item.second,boost::is_any_of("\t"));
        v_chr = line_data[0];
        vstart = line_data[1];
        vend = line_data[2];
        screen_message (screen_size, 2, "Exporting segment " + v_chr + ":" + vstart + "-" + vend + " ...", 1, v_quiet);
        v_output = v_out_original + "." + line_data[1];
        check_fasta();
    }
 
    screen_message (screen_size, 2, "Joining file ... ", 2, v_quiet);
    map <string,string> fasta_data;
    for(auto &&item: bed_data)
    {
        string file = v_out_original + "." + to_string(item.first);
        ifstream fasta( file );
        vector <string> data;
        for( std::string line; getline( fasta, line ); )
        {
            string id = line.substr(1);
            getline( fasta, line );
            string seq = line;
            
            fasta_data[id] = fasta_data[id] + seq;
        }
        fasta.close();
        string res = GetStdoutFromCommand("rm " + file);
    }
    
 
    ofstream fasta;
    fasta.open (v_out_original);
    for(auto &&item: fasta_data)
    {
        fasta << ">" << item.first << endl;
        fasta << item.second << endl;
    }
    fasta.close();
    screen_message (screen_size, 2, "Joining file ... done", 1, v_quiet);
    screen_message (screen_size, 2, "Output file: " + v_out_original, 1, v_quiet);

    
}


void check_fasta()
{
  
    if (notitle == 0) {
        screen_message (screen_size, 0, "", 1, 0);
        screen_message (screen_size, 0, Program_name + "::fasta" , 1, v_quiet);
    }
    
    int $higher = 0;
    int $lower = 0;
    if (((vstart == "0") || (vend == "0")))
    {
        vector<string> line_data;
        ifstream input( v_input );
        for( std::string line; getline( input, line ); )
        {
            if (line.substr(0,1) == "#"){continue;}
            if (line == ""){continue;}
            boost::split(line_data,line,boost::is_any_of("\t"));
            if ($higher == 0) {$higher = stoi(line_data[1]);}
            if ($lower == 0) {$lower = stoi(line_data[1]);}
            if (stoi(line_data[1]) >= $higher){$higher = stoi(line_data[1]);}
        }
        input.close();
    }
    if (vstart != "0") {$lower = stoi(vstart);}
    if (vend != "0") {$higher = stoi(vend);}

    
    screen_message (screen_size, 2, "Loading reference ...", 2, v_quiet);
    string sequence = "";
    ifstream input( v_reference );
    for( std::string line; getline( input, line ); )
    {
        if (line.substr(0,1) == ">"){continue;}
        if (line.substr(0,1) == ""){continue;}
        sequence += line;
    }
    input.close();
    
    sequence = sequence.substr($lower-1, ($higher - $lower)+1);
    
    if ((sequence == "") || (sequence.size() < ($higher - $lower)))
    {
        warnings.push_back("It was not possible to retrieve a proper reference sequence");
        PrintWarnings();
        return;
    }
    screen_message (screen_size, 2, "Loading reference ... done", 1, v_quiet);

    screen_message (screen_size, 2, "Loading vcf data ...", 2, v_quiet);
    //loading vcf data
    vector<string> head;
    vector<string> subdata;
    vector<string> genotypes;
    vector<string> positions;
    std::map <pair<string,string>, string> snp_data;
    std::map <string,int> samples;
    std::map <pair<string,string>,string> alt_data;
    
    int snps = 0;
    int format_pos = 0;
    int ref_pos = 0;
    int alt_pos = 0;
    
    ifstream vcf( v_input );
    for( std::string line; getline( vcf, line ); )
    {
        if (line == "") {continue;}
        if (line.substr(0,2) == "##"){continue;}
        if (line.substr(0,4) == "#CHR")
        {
            boost::split(head,line,boost::is_any_of("\t"));
            int a = 0;
            for(vector<string>::iterator sample = head.begin();sample!=head.end();++sample)
            {
                if (*sample == "REF"){ref_pos = a;}
                if (*sample == "ALT"){alt_pos = a;}
                if (*sample == "FORMAT"){format_pos = a;break;}
                a++;
            }
            continue;
        }
        
        boost::split(subdata,line,boost::is_any_of("\t"));
        if ((subdata[0] != v_chr) && (v_chr != "")){continue;}
        if ((stoi(subdata[1]) < stoi(vstart)) && (vstart != "0")) {continue;}
        if ((stoi(subdata[1]) > stoi(vend) ) && (vend != "0")) {continue;}
        
        positions.push_back (subdata[1]);
        snps++;
        
        vector <string> alternatives;
        boost::split(alternatives,subdata[alt_pos],boost::is_any_of(","));
        
        pair <string,string> key = make_pair(subdata[1],"0");
        alt_data[key] = subdata[ref_pos];
        
        int a = 1;
        for(vector<string>::iterator alt = alternatives.begin();alt!=alternatives.end();++alt)
        {
            pair <string,string> key = make_pair(subdata[1],to_string(a));
            alt_data[key] = *alt;
            a++;
        }
        
        for (a = format_pos + 1; a < subdata.size(); a++)
        {
            boost::split(genotypes,subdata[a],boost::is_any_of(":"));
            pair <string,string> key = make_pair(head[a],subdata[1]);
            snp_data[key] = genotypes[0];
            
        }
    }
    vcf.close();
    screen_message (screen_size, 2, "Loading vcf data ... done", 1, v_quiet);



    if (format_pos == 0) { warnings.push_back ("It was not possible to detect the FORMAT field. Please check the VCF file."); PrintWarnings(); return;}
    if (snps == 0) {
        warnings.push_back ("No variants within this segment and all samples will present the same reference sequence.");
    }

    screen_message (screen_size, 2, "Creating fasta file ...", 2, v_quiet);

    
    for (int a = format_pos + 1; a < subdata.size(); a++)
    {
        samples[head[a]] = 1;
    }
    
    ofstream myfile;
    myfile.open (v_output);
    
    for (int a = format_pos + 1; a < subdata.size(); a++)
    {
        myfile << ">" << head[a] << "_h1\n";
        string draft = sequence;
        int correct = 0;
        for(vector<string>::iterator pos = positions.begin();pos!=positions.end();++pos)
        {
            pair <string,string> key = make_pair(head[a],*pos);
            string gen = snp_data[key];
            vector<string> alleles;
            boost::split(alleles,gen,boost::is_any_of("|"));
            
            if (alleles.size() == 2)
            {
                if (alleles[0] == "0") {continue;}
                else
                {
                    pair <string,string> key1 = make_pair(*pos,"0");
                    pair <string,string> key2 = make_pair(*pos,alleles[0]);
                    
                    if (alt_data[key2] == "*") {continue;}
                    
                    int b = stoi(*pos)+correct-$lower;
                    
                    draft.replace(b,alt_data[key1].size(),alt_data[key2]);
                    correct = correct + (alt_data[key2].size() - alt_data[key1].size());
                }
            }
        }
        myfile << draft << endl;
        
        
        myfile << ">" << head[a] << "_h2\n";
        draft = sequence;
        correct = 0;
        for(vector<string>::iterator pos = positions.begin();pos!=positions.end();++pos)
        {
            pair <string,string> key = make_pair(head[a],*pos);
            string gen = snp_data[key];
            vector<string> alleles;
            boost::split(alleles,gen,boost::is_any_of("|"));
            
            if (alleles.size() == 2)
            {
                
                if (alleles[1] == "0") {continue;}
                else
                {
                    pair <string,string> key1 = make_pair(*pos,"0");
                    pair <string,string> key2 = make_pair(*pos,alleles[1]);
                    
                    if (alt_data[key2] == "*") {continue;}
                    
                    
                    int b = stoi(*pos)+correct-$lower;
                    
                    draft.replace(b,alt_data[key1].size(),alt_data[key2]);
                    correct = correct + (alt_data[key2].size() - alt_data[key1].size());
                }
            }
        }
        myfile << draft << endl;
    }
    myfile.close();
    
    screen_message (screen_size, 2, "Creating fasta file ... done", 1, v_quiet);
}





void help_fasta ()
{
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::fasta" , 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "* Author  : " + Program_author, 1, 0);
    screen_message (screen_size, 2, "* Contact : " + Program_contact, 1, 0);
    screen_message (screen_size, 2, "* Version : " + Program_version, 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "Options", 1, 0);
    screen_message (screen_size, 5, "input      the input VCF file [mandatory]", 1, 0);
    screen_message (screen_size, 5, "reference  a reference sequence in fasta format [madatory]", 1, 0);
    screen_message (screen_size, 5, "output     the fasta file to be created", 1, 0);
    screen_message (screen_size, 5, "chr        the chromosome to be considered", 1, 0);
    screen_message (screen_size, 5, "start      start processing from this position", 1, 0);
    screen_message (screen_size, 5, "end        process variants to this position", 1, 0);
    screen_message (screen_size, 5, "--quiet    quiet mode", 1, 0);
   screen_message (screen_size, 0, "", 1, 0);
    PrintWarnings();
    return;
}


void help_transcript ()
{
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::transcript" , 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "* Author  : " + Program_author, 1, 0);
    screen_message (screen_size, 2, "* Contact : " + Program_contact, 1, 0);
    screen_message (screen_size, 2, "* Version : " + Program_version, 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "Options", 1, 0);
    screen_message (screen_size, 5, "input      the input VCF file [mandatory]", 1, 0);
    screen_message (screen_size, 5, "reference  a reference sequence in fasta format [madatory]", 1, 0);
    screen_message (screen_size, 5, "output     the fasta file to be created", 1, 0);
    screen_message (screen_size, 5, "bed        a BED file with all the segments", 1, 0);
    screen_message (screen_size, 5, "--quiet    quiet mode", 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    PrintWarnings();
    return;
}


void main_fasta ()
{
    if (! fileExists(v_input)) {warnings.push_back("The input file could not be found.");help_fasta();return;}
    if (! fileExists(v_reference)) {warnings.push_back("The reference fasta file could not be found.");help_fasta();return;}
    if (v_output == "") {v_output = GetFileNameWithoutExtension (v_input) + ".fas";}
    check_fasta();
    return;
}




void main_transcript ()
{
    if (! fileExists(v_input)) {warnings.push_back("The input file could not be found.");help_transcript();return;}
    if (! fileExists(v_reference)) {warnings.push_back("The reference fasta file could not be found.");help_transcript();return;}
    if (! fileExists(v_bed)) {warnings.push_back("The BED file could not be found.");help_transcript();return;}
    if (v_output == "") {v_output = GetFileNameWithoutExtension (v_input) + ".fas";}
    check_transcript();
    return;
}
