/***************************************************************************
 *   Copyright (C) 2010 by Minh Anh Thi Nguyen, Tanja Gesell and Arndt von Haeseler   *
 *   minh.anh.nguyen@univie.ac.at, tanja.gesell@univie.ac.at, arndt.von.haeseler@univie.ac.at   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/
#include "utility.h"

/*--------------------------------------------------------------*/
/*--------------------------------------------------------------*/
/**
	print error message then exit program
*/
void outError(const char *error){
	cerr << "ERROR: " << error << endl;
	exit(EXIT_FAILURE);
}

/**
	print error message then exit program
*/
void outError(string error){
	outError(error.c_str());
}

/**
	print double error messages then exit program
*/
void outError(const char *error, const char *msg){
	string str = error;
	str += msg;
	outError(str);
}

/**
	print double error messages then exit program
*/
void outError(const char *error, string msg){
	string str = error;
	str += msg;
	outError(str);
}

/**
	convert string to integer, with error checking
	@param str original string
	@return the integer value
*/
int convert_int(const char *str) throw (string){
	char *endptr;
	long i = strtol(str, &endptr, 10);

	if ((i == 0 && ((long) endptr - (const long) str) == 0) || abs(i) == HUGE_VALL || *endptr != 0) {
		string err = "Expecting integer, but found \"";
		err += str;
		err += "\" instead";
		throw err;
	}

	return i;
}

/**
	convert string to double, with error checking
	@param str original string
	@return the double
*/
double convert_double(const char *str) throw (string){
	char *endptr;
	double d = strtod(str, &endptr);
	if ((d == 0.0 && ((long) endptr - (const long) str) == 0) || fabs(d) == HUGE_VALF || *endptr != 0) {
		string err = "Expecting floating-point number, but found \"";
		err += str;
		err += "\" instead";
		throw err;
	}
	return d;
}

string convertIntToString(int number)
{
   stringstream ss;//create a stringstream
   ss << number;//add number to the stream
   return ss.str();//return a string with the contents of the stream
}

/**
	search for a string in a vector of strings
*/
int searchString(string stringToFind, StringVec strVec)
{
        int vecSize;
        vecSize = strVec.size();
        for (int i=0; i<vecSize; i++){
			if (stringToFind == strVec[i])
			return i;
        }
        return -1;
}

/**
	read a vector of strings from file
*/
void readStringVec(const char* inFile, StringVec &retVec)
{
	cout << "Reading a set of strings, e.g. taxa names, in " << inFile << "..." <<endl;
	ifstream in;
	try{
		in.exceptions (ios::failbit | ios::badbit);
		in.open(inFile);
		readStringVec(in, retVec);
		in.clear();
		in.exceptions (ios::failbit | ios::badbit);
		in.close();
	} catch (ios::failure){
		outError(ERR_READ_INPUT, inFile);
	}
	cout << "Finish reading!" << endl;
}

/**
	read a vector of strings from an input stream
*/
void readStringVec(ifstream &inFile, StringVec &retVec)
{
	StringVec _retVec;
	string currentString;	
	try{				
		while (!inFile.eof())
		{
			//reading each line of the file
			//remove the badbit
			inFile.exceptions (ios::badbit);
			if ( !(inFile >> currentString) ) break;
			//set the failbit again
			inFile.exceptions (ios::failbit | ios::badbit);
			_retVec.push_back(currentString);
		}
	} catch(bad_alloc){
			outError(ERR_NO_MEMORY);
	} catch (const char *str){
			outError(str);
	} catch (char *str){
			outError(str);
	} catch (string str){
			outError(str);
	} catch (ios::failure){
			outError(ERR_READ_INPUT);
	} catch (...){
			outError(ERR_READ_ANY);
	}
	retVec = _retVec;
}

/**
	read a vector of strings from file
*/
/*void readIntVec(const char* inFile, IntVec &retVec)
{
	cout << "Reading a set integers in " << inFile << "..." <<endl;
	ifstream in;
	try{
		in.exceptions (ios::failbit | ios::badbit);
		in.open(inFile);
		readIntVec(in, retVec);
		in.clear();
		in.exceptions (ios::failbit | ios::badbit);
		in.close();
	} catch (ios::failure){
		outError(ERR_READ_INPUT, inFile);
	}
	cout << "Finish reading!" << endl;
}*/

/**
	read a vector of integers from an input stream
*/
/*void readIntVec(ifstream &inFile, IntVec &retVec)
{
	IntVec _retVec;
	string currentString;	
	int currentNumber;
	try{				
		while (!inFile.eof())
		{
			//reading each line of the file
			//remove the badbit
			inFile.exceptions (ios::badbit);
			if ( !(inFile >> currentString) ) break;
			//set the failbit again
			inFile.exceptions (ios::failbit | ios::badbit);
			//convert the string into integer with error checking
			currentNumber = convert_int(currentString.c_str());
			_retVec.push_back(currentNumber);
		}
	} catch(bad_alloc){
			outError(ERR_NO_MEMORY);
	} catch (const char *str){
			outError(str);
	} catch (char *str){
			outError(str);
	} catch (string str){
			outError(str);
	} catch (ios::failure){
			outError(ERR_READ_INPUT);
	} catch (...){
			outError(ERR_READ_ANY);
	}
	retVec = _retVec;
}*/

void readIntMatrix(const char* filename, const int col, IntMatrix &retMat)
{
	assert(col >= 1);
	IntVec numbers = IntVec();
	retMat.resize(col,numbers);
	ostringstream err_str;
	ifstream in;
	int line_num = 1;	
	// set the failbit and badbit
	in.exceptions(ios::failbit | ios::badbit);
	in.open(filename);	
	string line;
	string temp;
	int currentNum;
	// remove the failbit
	in.exceptions(ios::badbit);
	for (; !in.eof(); line_num++) {
		getline(in, line);
		if (line == "") continue;
		istringstream line_in(line);
		for (int id = 0; id < col; id++){
			if (!(line_in >> temp)){
				err_str << "Missing numbers on line " << line_num;
				throw err_str.str();
			}
			currentNum = convert_int(temp.c_str());
			retMat[id].push_back(currentNum);				
		}		
	}
	in.clear();
	// set the failbit again
	in.exceptions(ios::failbit | ios::badbit);
	in.close();
// 	cout << "In function readIntMatrix, nRows: " << retMat.size() << ", nCols: " << retMat[0].size() << endl;
}

/**
	Read a file contains site-indices and double values for site rates
	@param file file contains index of a site and its rate
	@param index (OUT) integer vector contains indices for the sites
	@param rates (OUT) double vector contains rates for the indicated sites 
*/
void readSiteRates (const char* filename, IntVec &index, DoubleVec &rates)
{	
	ostringstream err_str;
	ifstream in;
	index = IntVec();
	rates = DoubleVec();
	int line_num = 1;	
	// set the failbit and badbit
	in.exceptions(ios::failbit | ios::badbit);
	in.open(filename);	
	string line;
	string temp;
	int currentNum;
	double currentRate;
	// remove the failbit
	in.exceptions(ios::badbit);
	for (; !in.eof(); line_num++) {
		getline(in, line);
		if (line == "") continue;
		istringstream line_in(line);		
		if (!(line_in >> temp)){
			err_str << "Missing numbers on line " << line_num;
			throw err_str.str();
		}
		currentNum = convert_int(temp.c_str());
		//in the input file, site starts at 1, in this program site starts at 0 ==> must substract 1.
		currentNum = currentNum - 1;
		index.push_back(currentNum);
		
		if (!(line_in >> temp)){
			err_str << "Missing numbers on line " << line_num;
			throw err_str.str();
		}
		currentRate = convert_double(temp.c_str());
		rates.push_back(currentRate);
	}
	in.clear();
	// set the failbit again
	in.exceptions(ios::failbit | ios::badbit);
	in.close();
}

/**
	Read an input stream contains site-indices and double values for site rates
	@param in the input stream
	@param index (OUT) integer vector contains indices for the sites
	@param rates (OUT) double vector contains rates for the indicated sites 
*/
//void readSiteRates (ifstream &in, IntVec &index, DoubleVec &rates)
//{
//}


/**
	remove spaces contained in a string
*/
/*void removeSpaces(string &str)
{
	while(str.find(" ") != string::npos)
  	{
    	str.replace(str.find(" "), 1, "");
  	}	
}*/

/**
	print copyright
*/
void printCopyright()
{
	cout << "\nimosm: IMbedding One-Step Mutations (extra substitutions) into sequence alignments." << endl;
	cout << "Copyright (C) Minh Anh Thi Nguyen, Tanja Gesell and Arndt von Haeseler." << endl << endl;
}
/**
	print help - usage	
*/
void printHelp(Verbose vb)
{
	if (vb == ALL || vb == HIDDEN){
		cout << "USAGE: imosm -a <file> -t <file> OPTIONS" << endl;
		cout << "REQUIRED ARGUMENTS:" << endl;
		cout <<	"    -a <file>  : File contains an alignment in PHYLIP format." << endl;
		cout << "    -t <file>  : File contains a (rooted) tree in NEWICK format." << endl;		
		cout << "     OPTIONS: At least, either -n or -usebranch must be given." << endl;
		cout << "     You may prepare and submit these OPTIONS after having a look at the tree drawn!" << endl;
	}
	cout << "GENERAL OPTIONS:" << endl;
	cout << "    -h:             Print this help message." << endl;
	cout << "    -ni:            Do NOT prompt interactive interface. Just start the program with provided arguments." << endl;
	cout << "    -seed <number>: Seed for random number generator." << endl;
	cout << "    -r <number>:    Number of repetitions (i.e. number of output alignments)." << endl;
	cout << "    -prefix:        Prefix for the output alignments." << endl;
	cout << "    -sd:            To print the resulting alignments in standard interleave PHYLIP format" << endl;
	cout << "                    By default, the alignments are printed in sequential PHYLIP format." << endl;
	cout << "    -add:           To print additional information." << endl;
if ( vb == HIDDEN )
{
	cout << "    -pz <file>:     File contains parameters to run TREE-PUZZLE to compute site likelihood." << endl;
	cout << "    -pzTree <file>: File contains tree to run TREE-PUZZLE if it is different from the above tree." << endl;
}
	cout << "TREE OPTIONS:" << endl;
	cout << "    -o <file>:    File contains outgroup taxa to root the tree if the input tree is unrooted. Each taxon is on one line." << endl;	
	cout << "                  By default, the tree is rooted at the first leaf as appeared in the tree file." << endl;
	cout << "EMBEDDING OPTIONS: Either -n or -usebranch must be given!" << endl;
	cout << "    -n <number>:  Number of extra substitutions to be imbedded into the alignment." << endl;
	cout << "                  Note that EXACT <number> extra substitutions will happen!" << endl;
	cout << "                  This option can be used together with -b, -m, -p, -bm, -bp, -mp,-bmp, -bprob -mrates -prates." << endl;	
	cout << "    -usebranch:   Use branch length in the input tree as the EXPECTED number of extra substitutions per site to happen on the branch." << endl;
	cout << "                  This option can be used together with -m, -p, -mrates, -prates." << endl; 
	cout << endl;
	cout << "    -b <file>:    File contains a list of branches (brID) on which extra substitutions should be put." << endl;
	cout << "                  You are recommended to root the tree by providing -o <outgroupFile>, " << endl;
	cout << "                  look at the tree shown and then choose the branches (brIDs)." << endl;
	cout << "    -m <file>:    File contains a list of integers (1, 2 or 3, each on one line)" << endl;
	cout << "                  indicating the types of the substitutions that should happen." << endl;
	cout << "    -p <file>:    File contains a list of non negative integers (STARTED BY 0), each on one line, " << endl;
	cout << "                  indicating alignment site positions should be changed by the extra-substitutions." << endl;
	cout << "    -bm <file>:   File contains pairs of branch and substitution type for embedding." << endl;
	cout << "                  Each pair on one line, use space-bar or tab to seperate the two numbers." << endl;
	cout << "                  This file should contain exactly -n lines, otherwise the pair (branch, subs.type) may not be paired!" << endl; 
	cout << "                  If given, -b and -m will be ignored." << endl;
	cout << "    -bp <file>:   File contains pairs of branch and alignment site position for embedding." << endl;
	cout << "                  Each pair on one line, use space-bar or tab to seperate the two numbers." << endl;
	cout << "                  This file should contain exactly -n lines, otherwise the pair (branch, position) may not be paired!" << endl;
	cout << "                  If given, -b and -p will be ignored." << endl;
	cout << "    -mp <file>:   File contains pairs of mutation type and alignment site position for embedding." << endl;
	cout << "                  Each pair on one line, use space-bar or tab to seperate the two numbers." << endl;
	cout << "                  This file should contain exactly -n lines, otherwise the pair (subs.type, position) may not be paired!" << endl;
	cout << "                  If given, -m and -p will be ignored." << endl;
	cout << "    -bmp <file>:  File contains series of branch, mutation type and alignment site position for embedding." << endl;
	cout << "                  Each series on one line, use space-bar or tab to seperate the numbers." << endl;
	cout << "                  This file should contain exactly -n lines, otherwise the series (branch, subs.type, position) may not be paired together!" << endl;
	cout << "                  If given, all in {-b, -m, -p, -bm, -bp, -mp} will be ignored." << endl;
	cout << "    -bprob        If none in {-b, -bm, -bp, -bmp, -usebranch} is given then this option indicates that the extra " << endl;
	cout << "                  substitutions will happen on the branches proportionally to their branch length." << endl;
	cout << "                  This also means the input tree must have branch length. A branch is excluded from " << endl;
	cout << "                  the extra substitutions by setting its branch length to 0." << endl;
	cout << "                  If none in {-b, -bm, -bp, -bmp, -bprob, -usebranch} is given then branches are selected uniformly." << endl;			
	cout << "    -mrates <num1 num2 num3>: If none in {-m, -bm, -mp, -bmp} is given then this option indicates that the " << endl;
	cout << "                  classes of the extra substitutions will be selected proportionally to the given rates (probabilities): " << endl;
	cout << "                  num1 for the transition (A-G,C-T), num2 for the transverstion (A-C,G-T) and num2 for the transversion (A-T,G-C)." << endl;
	cout << "                  num1+num2+num3 need NOT to be 1. The program itself will normalize them." << endl;
	cout << "                  If none in {-m, -bm, -mp, -bmp, -mprob} is given then substitution types are selected uniformly." << endl;	
	cout << "    -prates <file>: File contains rates for sites in the alignment. Each line contains the site position" << endl;
	cout << "                   (STARTED BY 1) and the corresponding rate. This format follows the output from seq-gen. " << endl;
	cout << "                  If none in {-p, -bp, -mp, -bmp} is given, then alignment sites are selected proportionally to these rates."<< endl;
	cout << "                  If none in {-p, -bp, -mp, -bmp, -prates} is given, then alignment sites are selected uniformly." << endl;
}

void deriveInput(Params &params)
{
	//get outgroup if provided
	/*if ( params.outgroup.empty() && strcmp(params.outgroupFile, "") != 0 )
	{
		readStringVec(params.outgroupFile, params.outgroup);
	}*/
	int col = 1;
	IntMatrix intmat;
	//get branch if provided by user
	if ( strcmp(params.branchFile,"") != 0 )
	{
		
		readIntMatrix(params.branchFile,col,intmat);
		params.branches = intmat[0];	
		intmat.clear();
	}
	//get mutation types if provided by user
	if ( strcmp(params.mutationFile,"") != 0 )
	{
		
		readIntMatrix(params.mutationFile,col,intmat);
		params.mutations = intmat[0];		
		intmat.clear();
	}
	//get site positions if provided by user
	if ( strcmp(params.positionFile,"") != 0 )
	{
		
		readIntMatrix(params.positionFile,col,intmat);
		params.positions = intmat[0];		
		intmat.clear();
	}
	col = 2;
	//get branches and mutations if provided by user
	if ( strcmp(params.bmFile,"") != 0)
	{
		readIntMatrix(params.bmFile,col,intmat);
		params.branches = intmat[0];
		params.mutations = intmat[1];
		//params.nExtra = params.branches.size();
		intmat.clear();
	}
	//get branches and site positions if provided by user
	if ( strcmp(params.bpFile,"") != 0)
	{
		readIntMatrix(params.bpFile,col,intmat);
		params.branches = intmat[0];
		params.positions = intmat[1];
		//params.nExtra = params.branches.size();
		intmat.clear();
	}
	//get mutations and site positions if provided by user
	if ( strcmp(params.mpFile,"") != 0)
	{
		readIntMatrix(params.mpFile,col,intmat);
		params.mutations = intmat[0];
		params.positions = intmat[1];
		//params.nExtra = params.mutations.size();
		intmat.clear();
	}
	col = 3;
	if ( strcmp(params.bmpFile,"") != 0 )
	{
		readIntMatrix(params.bmpFile,col,intmat);
		params.branches = intmat[0];
		params.mutations = intmat[1];
		params.positions = intmat[2];
		//params.nExtra = params.branches.size();
 		intmat.clear();
	}

	if ( strcmp(params.prateFile, "") != 0 )
	{
		readSiteRates(params.prateFile, params.tbsPositions, params.pRates);
	}
}

void getInputLine(const char *message, int &argc, StringVec &argv)
{
	cout << "\n" << message;
	string line;
	getline(cin,line);
	istringstream line_in(line);
	argc = 0;
	argv.resize(0,"");	
	string tempString;
	while ( line_in >> tempString )
	{		
		argv.push_back(tempString);
		argc++;
	}
}

StringVec copyToStringVec (const int argc, char* argv[])
{
	string temp;
	StringVec ret;
	for (int i = 0; i < argc; i++ )
	{
		temp = argv[i];
		ret.push_back(temp);
	}
	return ret;
}

char mutated(const char nuc, const int mutType)
{
	int nucIndex = -1;
	switch ( toupper(nuc) ) {
		case 'A': nucIndex = 0; break;
		case 'C': nucIndex = 1; break;
		case 'G': nucIndex = 2; break;
		case 'T': nucIndex = 3; break;
		case 'U': nucIndex = 3; break;
		default: break;
	}
	if ( nucIndex == -1 ) {
		outError ("There is non-{ACGTU} in one of the sequences!" );
	}
	if ( mutType < 0 || mutType >= MutType )
		outError ("There is an invalid mutation type!");
	return Nucleotides[MutatedInt[nucIndex][mutType]];
}

void printUsersSpec(const Params params)
{
	cout << "\nYour specification: " << endl;
	cout << "    An alignment is provided in: " << params.alignFile << endl;
	cout << "    A tree is provided in: " << params.treeFile << endl;
	if ( strcmp(params.outgroupFile,"") != 0 )
		cout << "    Outgroup taxa are provided in: " << params.outgroupFile << endl;
	if ( !params.outgroup.empty() )
	{
		cout << "        The provided outgroup taxa are: " << endl;
		for ( StringVec::const_iterator it = params.outgroup.begin(); it != params.outgroup.end(); it++ )
			cout << "          " << (*it) << endl;
	}
	if ( !params.usebranch )
	{	
		if ( params.nExtra >=0 )
			cout << "    Number of extra mutations: " << params.nExtra << ". This will be the EXACT number of extra mutations that happen!" << endl;
		else
			cout << "    None of {-n, -usebranch} is given. The alignment remains unchanged." << endl;
	}
	else
		cout << "    Use branch length as expected number of extra substitutions per site that happen along the branch." << endl;	
	cout << endl;
	if (params.nExtra > 0 )
	{		
		if ( !params.positions.empty() )
			cout << "    A list of site positions being changed is provided." << endl;
		else
			if ( !params.pRates.empty() ){
				cout << "    A list of site (position) rates is provided." << endl;
				if ( params.pr == 1 )
					cout << "     Positions will be selected with a probability proportional to their rates." << endl;
				else if ( params.pr == -1 )
					cout << "     Positions will be selected with a probability reversely proportional to their rates." << endl;
			}
			else 
				if ( params.pr != 0 ) 
					cout << "    WARNING: A list of site index and site rates should be provided because pr ! = 0." << endl;
				else
					cout << "    Positions of to-be-mutated sites are selected randomly (uniform distribution)." << endl;
		if ( !params.mutations.empty() )
			cout << "    A list of the types of the mutations is provided." << endl;
		else
			if ( !params.mutRates.empty() ){
				cout << "    A list of rates for mutation types is provided." << endl;
				if ( params.mr == 1 )
					cout << "    Mutation types will be selected with a probability proportional to their rates." << endl;
				else if ( params.mr == -1 )
					cout << "    Mutation types will be selected with a probability inversely proprotional to their rates." << endl;
			}
			else
				if ( params.mr != 0 )
					cout << "     WARNING: A list of rates for mutation types should be provided because mr != 0." << endl;
				else 
					cout << "    Types of the mutations are selected randomly (uniform distribution)." << endl;
		if ( !params.branches.empty() )
			cout << "    A list of branches on the tree is provided. " << endl;
		else
		{
			if ( params.br == 1 )
				cout << "    Branches on the tree are depicted randomly with a probability proportional to their relative lengths." << endl;
			else if ( params.br == -1 )
				cout << "    Branches on the tree are depicted randomly with a probability reversely proportional to their relative lengths." << endl;
			else
				cout << "    Branches on the tree are depicted randomly (uniform distribution)." << endl; 
		}
		cout << "    Number of repetitions: " << params.nRepeat << endl;
	}
}

int binarySearch(const DoubleVec vec, const double key, int &begin, int &end)
{
	int probe = (end - begin)/2 + begin;
	int size = vec.size();
// 	cout << "in binarySearch, key: " << key << ", begin: " << begin << ", end: " << end << ", probe: " << probe << endl;
  	if ( probe < 0 || probe > size ) return -1;
// 	if ( probe <= 0 ) return -1;
	
	if ( vec[probe] < key )
	{
		int next = probe + 1;
		if ( next >= size ) return -1;	
		if ( key <= vec[next] ) 
			return next;
		else
		{
			begin = next;
			return binarySearch(vec, key, begin, end);
		}		
	}
	else if ( vec[probe] > key )
	{		
		int prev = probe - 1;
		if ( prev < 0) return -1;
		if ( key > vec[prev] ) 
			return probe;
		else if (key < vec[prev]){
			end = prev;
			return binarySearch(vec, key, begin, end);
		}else
			return prev;
	}
	else return probe;

/*	int probe = (end - begin)/2;
	if ( probe <= 0 || probe >= end ) return -1;
	
	if ( vec[probe] < key )
	{
		int next = probe + 1;
		if ( key < vec[next] ) 
			return probe;
		else if ( key > vec[next] )
		{
			begin = next;
			return binarySearch(vec, key, begin, end);
		}
		else return next;
	}
	else if ( vec[probe] > key )
	{
		int prev = probe - 1;
		if ( key >= vec[prev] ) 
			return prev;
		end = prev;
		return binarySearch(vec, key, begin, end);
	}
	else return probe;*/
}

int searchInMap(const DoubleIntMap amap, const double key)
{
	DoubleVec vec;
	for ( DoubleIntMap::const_iterator it = amap.begin(); it != amap.end(); it++ )
		vec.push_back(it->first);
	int begin = 0;
	int end = vec.size();
	int foundIndex = binarySearch(vec, key, begin, end);
// 	cout << "in searchInMap, foundIndex: " << foundIndex << endl;
	if ( foundIndex == -1 ) 
		return -1;
	else
	{
		return amap.find(vec[foundIndex])->second;
	}
}

int selectIndex(const DoubleIntMap doubleInt, const int num, IntVec &ret)
{
	int needed = num - ret.size();
	if ( needed <= 0 ){
		cout << "Already have enough generated numbers! " << endl;
		return -1;
	}
	int count = 0;
	//cout << "Random \t Index" << endl;
	while ( count < needed ){
		double temp = random01();
// 		cout << temp << "\t";
		int index = searchInMap(doubleInt, temp);
		if ( index >= 0 ){
			count++;
			ret.push_back(index);
// 		cout << index;
		}
// 		cout << endl;
	}
	return 0;	
}

int selectNumber(const int lower, const int upper, const int num, IntVec &ret, bool overlap)
{
	int needed = num - ret.size();
	if ( needed <= 0 )
	{
		cout << "Already have enough generated numbers! " << endl;
		return -1;
	}
	if ( overlap == true)
	{
		for (int i = 0; i < needed; i++)
			ret.push_back(randomInt(lower,upper));
	}
	else
	{
		IntSet genSet;
		int count = genSet.size();
		while ( count < needed )
		{
			genSet.insert(randomInt(lower,upper));
			count = genSet.size();
		}
		for (IntSet::const_iterator it = genSet.begin(); it != genSet.end(); it++)
			ret.push_back((*it));
	}
	return 0;
}

int mutCumRate (const IntDoubleMap rates, DoubleIntMap & ret, int seType)
{	
	
	if ( seType == NONE ) return -1;
	ret.insert(DoubleIntMap::value_type(0.0,-1));
	double sumRate = 0;
	for ( IntDoubleMap::const_iterator it = rates.begin(); it != rates.end(); it++ )
		sumRate += it->second;
	if ( sumRate == 0 ) return -1;
	if ( seType == PRO )
	{
		double cumSum = 0;
		for ( IntDoubleMap::const_iterator it = rates.begin(); it != rates.end(); it++ )
		{
			cumSum += it->second;
			ret.insert(DoubleIntMap::value_type( cumSum/sumRate, it->first ) );
		}
		return 0;
	}
	if ( seType == INV_PRO ) // if a rate == 0, the corresponding mutation will be ignore!
	{
		double sumInvRate = 0;
		for ( IntDoubleMap::const_iterator it = rates.begin(); it != rates.end(); it++ )
		{
			if ( it->second != 0)
				sumInvRate += sumRate/it->second;			
		}
		double cumSum = 0;
		for ( IntDoubleMap::const_iterator it = rates.begin(); it != rates.end(); it++ )
		{
			if ( it->second != 0 )
			{
				cumSum += sumRate/it->second;
				ret.insert( DoubleIntMap::value_type( cumSum/sumInvRate, it->first ));
			}
		}
		return 0;	
	}
	return -1;	
}

int mutCumRate2 (const DoubleIntMM rates, DoubleIntMap & ret, int seType)
{	
	
	if ( seType == NONE ) return -1;
	ret.insert(DoubleIntMap::value_type(0.0,-1));
	double sumRate = 0;
	for ( DoubleIntMM::const_iterator it = rates.begin(); it != rates.end(); it++ )
		sumRate += it->first;
	if ( sumRate == 0 ) return -1;
	if ( seType == PRO )
	{
		double cumSum = 0;
		for ( DoubleIntMM::const_iterator it = rates.begin(); it != rates.end(); it++ )
		{
			cumSum += it->first;
			ret.insert(DoubleIntMap::value_type( cumSum/sumRate, it->second ) );
		}
		return 0;
	}
	if ( seType == INV_PRO ) // if a rate == 0, the corresponding mutation will be ignore!
	{
		double sumInvRate = 0;
		for ( DoubleIntMM::const_iterator it = rates.begin(); it != rates.end(); it++ )
		{
			if ( it->first != 0)
				sumInvRate += sumRate/it->first;			
		}
		double cumSum = 0;
		for ( DoubleIntMM::const_iterator it = rates.begin(); it != rates.end(); it++ )
		{
			if ( it->second != 0 )
			{
				cumSum += sumRate/it->first;
				ret.insert( DoubleIntMap::value_type( cumSum/sumInvRate, it->second ));
			}
		}
		return 0;	
	}
	return -1;	
}

int selectIndexAlias(const DoubleVec cutoffVec, const IntVec aliasVec, const int num, IntVec &ret)
{
	IntVec genIndex;
	if ( cutoffVec.size() != aliasVec.size() ) return -1;
	if ( cutoffVec.empty() ) return -1;
	int totalSize = cutoffVec.size();
	//cout << "totalSize: " << totalSize << endl;
	for ( int i = 0; i < num; i++ )
	{
		//generate a U01 random number
		double u = random01();
		int k = u*totalSize;
		double r = (double)u*totalSize - (double)k;
		//cout << "u: " << u << " k: " << k << ", r: " << r << endl;
		if ( r <= cutoffVec[k] )
			genIndex.push_back(k);
		else
			genIndex.push_back(aliasVec[k]);		
	}
	ret = genIndex;
	return 0;
}

int generateCutoffAlias (const DoubleVec uprobs, DoubleVec &reCutoffVec, IntVec &reAliasVec)
{
	int totalNum = uprobs.size();
	if (totalNum <= 0 ) return -1;
	//make sure that the probabilies sum up to 1
	double cumSum = 0;
	for ( int i = 0; i < totalNum; i++ )
		cumSum += uprobs[i];
	//create a new vector for the probabilities that sum up to 1
	//cout << "cumSum: " << cumSum << endl;
	DoubleVec probs;
	for ( int i = 0; i < totalNum; i++ )
		probs.push_back(uprobs[i]/cumSum);
	IntVec smallVec; //vector contains the index, j where cutoffVec[j] < 1
	IntVec largeVec; //vecotr contains the index, k where cutoffVec[k] >= 1
	DoubleVec cutoffVec;
	IntVec aliasVec;
	for ( int index = 0; index < totalNum; index++ )
	{
		double curF = probs[index]*totalNum;
		cutoffVec.push_back(curF);
		aliasVec.push_back(-1);
		if ( curF < 1.0 ) 
			smallVec.push_back(index);
		else	
			largeVec.push_back(index);
	}
	bool stop = false;
	do
	{	
		//cout << "size of small vec: " << smallVec.size() << endl;
		//take an index such that cutoffVec[index] < 1, erase this index from the vector containing it.
		int smallID = (*smallVec.begin());		
		smallVec.erase(smallVec.begin());
		//take an index such that cutoffVec[index] >= 1, erasing this index from the largeVec (and then pushing into the smallVec) or not depends on what comes out afterward. We need to check later (*).
		int largeID = (*largeVec.begin());
		//updating the aliasVec at position smallID
		aliasVec[smallID] = largeID;
		//updating the cutoffVec at position largeID
		cutoffVec[largeID] = cutoffVec[largeID] - (1 - cutoffVec[smallID]);

		//cheking (*)
		if ( cutoffVec[largeID] < 1.0 )
		{
			largeVec.erase(largeVec.begin());
			smallVec.push_back(largeID);
		}	
		if ( smallVec.size() == 0 || largeVec.size() == 0)
			stop = true; 
	} while (!stop);
	//cout << "size of large vector: " << largeVec.size() << endl;
	//cout << "size of small vector: " << smallVec.size() << endl;
	//cout << "index by smallVec: " << (*smallVec.begin()) << " cutoff by this index: " << cutoffVec[(*smallVec.begin())] << ", alias by this index: " << aliasVec[smallVec[0]] << endl;
	//cout << "Final check\n";
	if ( smallVec.size() > 1 || largeVec.size() > 1 )
		outError("internal error in generateCutOffAlias, smallVec.size() > 1 || largeVec.size() > 1");
	else
		if ( smallVec.size() == 1 )
			aliasVec[smallVec[0]] = smallVec[0];
		else aliasVec[largeVec[0]] = largeVec[0];
	//checking final results,
	for ( int j = 0; j < totalNum; j++ )
		if ( aliasVec[j] == -1 ) return -1;
	//returning values
	reCutoffVec = cutoffVec;
	reAliasVec = aliasVec;
	return 0;
}

int selectFromIntVec(const int num, const IntVec inputVec, IntVec &ret)
{
	int upperIndex = inputVec.size() - 1;
	if ( num <= 0 || upperIndex <=0 )
	{
		cout << "Warning: Selecting " << num << " integers from a vector containing " << upperIndex + 1 << " numbers!" << endl;
		return -1;
	}
	IntVec _ret;
	IntVec indexVec = randomIntVec (0, upperIndex, num);
	for ( int i=0; i<num; i++)
		_ret.push_back(inputVec[indexVec[i]]);
	ret = _ret;
	return 0;	
}

int countEvents (const double rate, const double time)
{
	double remainTime = time;
    int numEvents = 0;
    while ( remainTime > 0){
    	//draw a random number r in (0,1)
        double r = random01();
        //derive the time t
        double t = -(log(1-r)/rate);
        if ( t < remainTime ) numEvents++;
        remainTime -= t;
    }
	return numEvents;
}
/**======================== FROM IQ-TREE package ======================================
	to incorporate class Alignment (without changing anything except deleting
=======================================================================================*/
/**
        Output a warning message to screen
        @param error warning message
*/
void outWarning(const char *warn)
{
        cerr << "WARNING: " << warn << endl;
}

void outWarning(string warn)
{
        outWarning(warn.c_str());
}
InputType detectInputFile(char *input_file) {

	try {
		ifstream in;
		in.exceptions(ios::failbit | ios::badbit);
		in.open(input_file);

		unsigned char ch;
		int count=0;
		do {
			in >> ch;
		} while (ch <= 32 && !in.eof() && count++ < 20);
		in.close();
		return (ch == '#') ? IN_NEXUS : ( (ch == '(' || ch == '[') ? IN_NEWICK : IN_OTHER );
	} catch (ios::failure) {
		outError("Cannot read file ", input_file);
	}
	return IN_OTHER;
}

/**---------------------------------------------------------------------------------------------*/
void readSitelh (const char* inFile, DoubleVec &logllVec)
{
	cout << "\nReading file containing site's loglikelihood: " << inFile << "...." << endl;
    ifstream in;
	try{
		in.exceptions (ios::failbit | ios::badbit);
		in.open(inFile);
		readSitelh(in, logllVec);
		in.clear();
		in.exceptions (ios::failbit | ios::badbit);
		in.close();
	} catch (ios::failure){
		outError(ERR_READ_INPUT, inFile);
	}
	cout << "Finish reading site's loglikelihood!" << endl;	
}

void readSitelh (ifstream &inFile, DoubleVec &logllVec)
{
	DoubleVec _logllVec;
	int siteNum;
	string currentString;
	double currentLogLL;	
	try{
		//ignore the first number
		inFile >> currentString;
		//read number of sites
		inFile >> currentString;
		siteNum = convert_int(currentString.c_str());
		//ignore the name of the tree		
		inFile >> currentString;		
		while (!inFile.eof())
		{
			//reading each line of the file
			//remove the badbit
			inFile.exceptions (ios::badbit);
			if ( !(inFile >> currentString) ) break;
			//set the failbit again
			inFile.exceptions (ios::failbit | ios::badbit);
			_logllVec.push_back(convert_double(currentString.c_str()));
		}
	} catch(bad_alloc){
			outError(ERR_NO_MEMORY);
	} catch (const char *str){
			outError(str);
	} catch (char *str){
			outError(str);
	} catch (string str){
			outError(str);
	} catch (ios::failure){
			outError(ERR_READ_INPUT);
	} catch (...){
			outError(ERR_READ_ANY);
	}
	if (siteNum != _logllVec.size())
		outError("Actual number of site's likelihoods is not consistent with the announced number in the first line.");
	logllVec = _logllVec;
}

void puzzleLogll (const char alignFile[], const char pztree[], const char puzzleFile[], const char prefix[], DoubleVec &logll)
{
	//prepare command line to run tree-puzzle
	string tpCommand("puzzle -usebranch -wsl ");
	tpCommand += alignFile;
	tpCommand += " ";
	tpCommand += pztree;
	tpCommand += " -prefix=";
	tpCommand += prefix;
	tpCommand += " < ";
	tpCommand += puzzleFile;
	tpCommand += " 2>&1 > ";
	tpCommand += prefix;
	tpCommand += ".puzzle.log";
	cout << "Running puzzle with alignment file: " << alignFile << endl;
	cout << "Command line: " << tpCommand << endl;
	system(tpCommand.c_str());

	//name of the file containing site's likelihood
	char sitelhFile[MAX_FILE_NAME];
	int temp = sprintf(sitelhFile, "%s.sitelh", prefix);
	//construct alignPatterns from align->sequences and the file containing site's likelihood
	readSitelh (sitelhFile, logll);

	//remove files produced by puzzle
	char tempFile[MAX_FILE_NAME];
	temp = sprintf(tempFile,"%s.dist", prefix);
	if ( remove(tempFile) != 0)
			outError("in deleting file: ", tempFile);
	temp = sprintf(tempFile,"%s.tree", prefix);
	if ( remove(tempFile) != 0)
			outError("in deleting file: ", tempFile);
	temp = sprintf(tempFile,"%s.puzzle", prefix);
	if ( remove(tempFile) != 0)
			outError("in deleting file: ", tempFile);
	temp = sprintf(tempFile,"%s.puzzle.log", prefix);
	if ( remove(tempFile) != 0)
			outError("in deleting file: ", tempFile);
}

double computeChisquare (const DoubleVec observed, const DoubleVec expected)
{
	int num = observed.size();
	assert (num == expected.size());
	double sum = 0;
	for ( int i = 0; i < num; i++ ){
//  		cout << "Index: " << i << ", observed freq: " << observed[i] << ", expected freq: " << expected[i] << endl;
		sum += pow((observed[i]-expected[i]),2)/expected[i];
	}
	return sum;
}
