
#include <map>
#include <unordered_map>
#include <boost/algorithm/string.hpp>
#include <vector>
#include <mutex>
#include <chrono>
#include <iostream>
#include <fstream>
#include "ThreadPool.h"
#include "evidence.hpp"
#include "external.hpp"
#include "functions.hpp"

using namespace std;

void check_arlequin()
{
    
    v_vcf = v_input;
    v_out = v_output;
    
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::arlequin" , 1, v_quiet);
    screen_message (screen_size, 2, "Loading data ...", 2, v_quiet);
    
    vector<string> head;
    vector<string> subdata;
    vector<string> genotypes;
    vector<string> positions;
    std::map <pair<string,string>, string> snp_data;
    std::map <string,int> samples;
    
    int head_start = 0;
    int sample_size = 0;
    int snps = 0;
    
    ifstream input( v_vcf );
    for( std::string line; getline( input, line ); )
    {
        if (line.substr(0,2) == "##"){continue;}
        if (line.substr(0,4) == "#CHR")
        {
            boost::split(head,line,boost::is_any_of("\t"));
            for(vector<string>::iterator sample = head.begin();sample!=head.end();++sample)
            {
                sample_size++;
                if (*sample == "FORMAT"){head_start = sample_size;}
            }
            continue;
        }
        
        boost::split(subdata,line,boost::is_any_of("\t"));
 
        int start_int = 0;
        if (vstart != "0")
        {
            try {
                start_int = stoi(vstart);
            }
            catch (const std::exception& e) {
                start_int = 0;
            }
        }
        
        int end_int = 0;
        if (vend != "0")
        {
            try {
                end_int = stoi(vend);
            }
            catch (const std::exception& e) {
                end_int = 0;
            }
        }
        
        
        if (v_chr != "")
        {
            if (v_chr != subdata[0]){continue;}
        }
        if (start_int > 0)
        {
            if (stoi(subdata[1]) < start_int){continue;}
        }
        if (end_int > 0)
        {
            if (stoi(subdata[1]) > end_int) {break;}
        }
        
        
        
        if ((subdata[0] != v_chr) && (v_chr != "")){continue;}
        if ((stoi(subdata[1]) < stoi(vstart)) && (vstart != "0")) {continue;}
        if ((stoi(subdata[1]) > stoi(vend) ) && (vend != "0")) {continue;}
        
        snps++;
        positions.push_back (subdata[1]);
        
        for (int a = head_start; a < sample_size; a++)
        {
            boost::split(genotypes,subdata[a],boost::is_any_of(":"));
            
            
            pair <string,string> key = make_pair(head[a],subdata[1]);
            snp_data[key] = genotypes[0];
            
        }
    }
    input.close();
    
    
    if (head_start == 0) { warnings.push_back("This VCF file does not have a FORMAT field. Please check the VCF file"); return;}
    if (snps == 0) { warnings.push_back("No data following these parameters"); return;}
    
    
    for (int a = head_start; a < sample_size; a++)
    {
        samples[head[a]] = 1;
    }
    screen_message (screen_size, 2, "Loading data ... done", 1, v_quiet);
    screen_message (screen_size, 2, "Printing ARP file ... ", 2, v_quiet);

    
    
    
    if (v_pop == "") {
        
        
        ofstream myfile;
        myfile.open (v_output);
        
        myfile << "[Profile]\nTitle = \"";
        myfile << v_input << "\"\n";
        myfile << "NbSamples = 1\nDataType = STANDARD\nGenotypicData = 1\nLocusSeparator = WHITESPACE\nMissingData = \"?\"\nGameticPhase = 0\nRecessiveData = 0\n";
        
        myfile << "\n[Data]\n[[Samples]]\nSampleName = \"" << v_input << "\"\n";
        myfile << "SampleSize = ";
        myfile << (sample_size - head_start);
        myfile << endl;
        
        myfile << "SampleData = {\n";
        
        
        vector<string> helper;
        
        
        for (int a = head_start; a < sample_size; a++)
        {
            myfile << head[a] << "  1 ";
            string line1;
            string line2;
            for(vector<string>::iterator pos = positions.begin();pos!=positions.end();++pos)
            {
                pair <string,string> key = make_pair(head[a],*pos);
                string gen = snp_data[key];
                if (gen == ".") {gen = "./.";}

                std::replace( gen.begin(), gen.end(), '|', '/'); // replace all '|' to '/'
                boost::split(helper,gen,boost::is_any_of("/"));
                
                if (helper[0] != ".") {line1 = line1 + " " + helper[0];}
                if (helper[0] == ".") {line1 = line1 + " ?";}
                
                
                if (helper[1] != ".") {line2 = line2 + " " + helper[1];}
                if (helper[1] == ".") {line2 = line2 + " ?";}
                
            }
            
            int s = head[0].size();
            s = s + 5;
            line2.insert(0,s,' ');
            myfile << line1 << endl;
            myfile << line2 << endl;
            
            
        }
        myfile << "}\n";
        myfile.close();
    }
    
    
    
    if (v_pop != "") {
        
        vector<string> pop_data;
        std::map <string, string> pop;
        
        ifstream input( v_pop );
        for( std::string line; getline( input, line ); )
        {
            boost::split(pop_data,line,boost::is_any_of(","));
            if (pop[pop_data[1]] == "") {pop[pop_data[1]] = pop_data[0];}
            else {pop[pop_data[1]] = pop[pop_data[1]] + "," + pop_data[0];}
        }
        input.close();
        
        int pop_number = 0;
        std::map <string, int> pop_size;
        vector<string> helper;
        
        for (std::map<string, string>::iterator i = pop.begin(); i != pop.end(); ++i)
        {
            pop_number++;
            
            boost::split(helper,pop[i->first],boost::is_any_of(","));
            for(vector<string>::iterator sample_id = helper.begin();sample_id!=helper.end();++sample_id)
            {
                if (samples[*sample_id] == 1) {pop_size[i->first]++;}
            }
            
        }
        
        
        
        ofstream myfile;
        myfile.open (v_output);
        
        
        myfile << "[Profile]\nTitle = \"";
        myfile << v_input << "\"\n";
        myfile << "NbSamples = ";
        myfile << pop_number;
        myfile << endl;
        myfile << "DataType = STANDARD\nGenotypicData = 1\nLocusSeparator = WHITESPACE\nMissingData = \"?\"\nGameticPhase = 0\nRecessiveData = 0\n";
        
        myfile << "\n[Data]\n[[Samples]]\n";
        
        
        for (std::map<string, string>::iterator i = pop.begin(); i != pop.end(); ++i)
        {
            myfile << "SampleName = \"" << i->first << "\"\n";
            myfile << "SampleSize = ";
            myfile << pop_size[i->first];
            myfile << endl;
            myfile << "SampleData = {\n";
            
            boost::split(helper,pop[i->first],boost::is_any_of(","));
            
            
            for(vector<string>::iterator sample_id = helper.begin();sample_id!=helper.end();++sample_id)
            {
                
                if (samples[*sample_id] == 1)
                {
                    
                    myfile << *sample_id << "  1 ";
                    string line1;
                    string line2;
                    for(vector<string>::iterator pos = positions.begin();pos!=positions.end();++pos)
                    {
                        pair <string,string> key = make_pair(*sample_id,*pos);
                        string gen = snp_data[key];
                        std::replace( gen.begin(), gen.end(), '|', '/'); // replace all '|' to '/'
                        vector<string> alleles;
                        
                        boost::split(alleles,gen,boost::is_any_of("/"));
                        
                        
                        
                        if (alleles[0] != ".") {line1 = line1 + " " + alleles[0];}
                        if (alleles[0] == ".") {line1 = line1 + " ?";}
                        
                        
                        if (alleles[1] != ".") {line2 = line2 + " " + alleles[1];}
                        if (alleles[1] == ".") {line2 = line2 + " ?";}
                        
                    }
                    
                    
                    int s = head[0].size();
                    s = s + 5;
                    line2.insert(0,s,' ');
                    myfile << line1 << endl;
                    myfile << line2 << endl;
                }
                
            }
            myfile << "}\n";
        }
    }
    
    screen_message (screen_size, 2, "Printing ARP file ... done", 1, v_quiet);
    screen_message (screen_size, 5, "", 1, v_quiet);
    screen_message (screen_size, 5, "If you are dealing with a phased VCF using '|' to indicate the phase", 1, v_quiet);
    screen_message (screen_size, 5, "you may change 'GameticPhase = 0' to 'GameticPhase = 1'", 1, v_quiet);
    screen_message (screen_size, 5, "", 1, v_quiet);
    return;
    
    
}


void help_arlequin ()
{
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 0, Program_name + "::arlequin" , 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "* Author  : " + Program_author, 1, 0);
    screen_message (screen_size, 2, "* Contact : " + Program_contact, 1, 0);
    screen_message (screen_size, 2, "* Version : " + Program_version, 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    screen_message (screen_size, 2, "Options", 1, 0);
    screen_message (screen_size, 5, "input      the input VCF file [mandatory]", 1, 0);
    screen_message (screen_size, 5, "output     the ARP file to be created", 1, 0);
    screen_message (screen_size, 5, "chr        the chromosome to be considered", 1, 0);
    screen_message (screen_size, 5, "start      start processing from this position", 1, 0);
    screen_message (screen_size, 5, "end        process variants to this position", 1, 0);
    screen_message (screen_size, 5, "pop        file with sample definition, one per line. E.g. HG000096,CEU", 1, 0);
    screen_message (screen_size, 5, "--quiet    quiet mode", 1, 0);
    screen_message (screen_size, 0, "", 1, 0);
    PrintWarnings();
    return;
}

void main_arlequin ()
{
    if (! fileExists(v_input)) {warnings.push_back("The input file could not be found.");help_arlequin();return;}
    if (v_pop != "") {if(! fileExists(v_pop)) {warnings.push_back("The POP file could not be found.");help_arlequin();return;}}
    if (v_output == "") {v_output = GetFileNameWithoutExtension (v_input) + ".arp";}
    check_arlequin();
    return;
}
