/**
	Copy from iq-tree package. Just delete some methods and add some
*/
//
// C++ Interface: alignment
//
// Description:
//
//
// Author: BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>, (C) 2008
//
// Copyright: See COPYING file that comes with this distribution
//
//

#ifndef ALIGNMENT_H
#define ALIGNMENT_H

#include "utility.h"
#include "pattern.h"
#include "split.h"

const char STATE_UNKNOWN = 127;
const char STATE_INVALID = 128;
const int NUM_CHAR = 256;


#ifdef USE_HASH_MAP
/*
	Define the hash function of Split
*/
#if defined(WIN32) 
namespace stdext {
#else
namespace __gnu_cxx {
#endif

	template<>
	struct hash<string> {
		size_t operator()(string str) const {
			hash<const char*> hash_str;
			return hash_str(str.c_str());
		}
	};
} // namespace __gnu_cxx
#endif


#ifdef USE_HASH_MAP
typedef hash_map<string, int> PatternIntMap;
//typedef map<string, int> PatternIntMap;
#else
typedef map<string, int> PatternIntMap;
#endif
/**
Multiple Sequence Alignment. Stored by a vector of site-patterns

	@author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler <minh.bui@univie.ac.at>
*/
class Alignment : public vector<Pattern>
{
public:
	friend class Split;	
	/**
		constructor
	*/
    Alignment();

	/**
		constructor
		@param filename file name
		@param intype (OUT) input format of the file
	*/
    Alignment(char *filename, InputType &intype);

	/**
		destructor
	*/	
    virtual ~Alignment();


/****************************************************************************
	input alignment reader
****************************************************************************/

	/**
		add a pattern into the alignment
		@param pat the pattern
		@param site the site index of the pattern from the alignment
		@param freq frequency of pattern
		@return TRUE if pattern contains only gaps or unknown char. 
				In that case, the pattern won't be added.
	*/
	bool addPattern(Pattern &pat, int site, int freq = 1);


	/**
		read the alignment in NEXUS format
		@param filename file name
		@return 1 on success, 0 on failure
	*/
// 	int readNexus(char *filename);


	/**
		read the alignment in PHYLIP format
		@param filename file name
		@return 1 on success, 0 on failure
	*/
	int readPhylip(char *filename);

	/**
		extract the alignment from a nexus data block, called by readNexus()
		@param data_block data block of nexus file
	*/
//     void extractDataBlock(NxsCharactersBlock *data_block);


/****************************************************************************
	get general information from alignment
****************************************************************************/

	/**
		@return number of sequences
	*/
	inline int getNSeq() const { return seq_names.size(); }


	/**
		@return number of sites (alignment columns)
	*/
	inline int getNSite() const { return site_pattern.size(); }


	/**
		 @return number of patterns
	*/
	inline int getNPattern() const { return size(); }

	/**
		@param i sequence index
		@return sequence name
	*/
	string &getSeqName(int i);

	/**
		@return vector containing frequencies of patterns in the same order with the vector (patterns)
		Should be used with printReducedAlign
	*/
	vector< int > getFreqVec() const;
	vector< int > getFreqVec_ordPat() const;

	/**
		@param seq_name sequence name
		@return corresponding ID, -1 if not found
	*/
	int getSeqID(string &seq_name);
	
/****************************************************************************
	ADDED by MA: Begin
****************************************************************************/
	/**
		Copy constructor
	*/
//	Alignment (const Alignment &anAlign);
	/**
		copy assignment
	*/
//	Alignment &operator = (const Alignment &other);
	/**
		return the (string) vector of sequence names
	*/
	inline vector< string > getSeqNames() const {return seq_names;}

	/**
		return the site_pattern vector
	*/
	inline vector<int> getSite_Pattern() const {return site_pattern;}

	inline PatternIntMap getPattern_Index() const {return pattern_index;}

	vector< string > getSequences() const;

	/**
		print the alignment in strict PHYLIP format or sequential PHYLIP format
		into a file
		@param fileName name of the output file
		@param interleave indicate strict PHYLIP format
	*/
	void printAlign (char *fileName, bool interleave = false);
	void printAlign (ostream &out, bool interleave = false);

	/**
		print the sub-alignment which contains all the patterns,
		each appears only once in the same order as in the vector <Pattern>
		@param fileName name of the output file
		@param interleave indicate strict PHYLIP format
	*/
	void printReducedAlign (char *fileName, bool interleave = false);
	void printReducedAlign (ostream &out, bool interleave = false);

	//Alignment* mutateTo( const MapBrSplit mapBrSplit, const vector< int > branches, const vector< int > mutations, const vector< int > positions );
	/**
		mutate this alignment by some extra-mutations on a given tree (splits).
		@param mapBrSplit a map between branchID and the corresponding split
		@param branches a vector of branchID indicating the branches on which extra-mutations are put
		@param mutations a vector of types of mutations
		@param positions a vector of site positions in the alignment being mutated
		@RETURN the resulting alignment
	*/
// 	Alignment mutateTo( const MapBrSplit mapBrSplit, const vector< int > branches, const vector< int > mutations, const vector< int > positions );

	Alignment mutateTo2( const MapBrSplit mapBrSplit, const vector< int > branches, const vector< int > mutations, const vector< int > positions );

/****************************************************************************
	ADDED by MA: End
****************************************************************************/


/****************************************************************************
	alignment general processing
****************************************************************************/

	/**
		extract sub-alignment of a sub-set of sequences
		@param seq_id ID of sequences to extract from
	*/
// 	void extractSubAlignment(IntVector &seq_id, Alignment *sub_aln);

/****************************************************************************
	Distance functions
****************************************************************************/


	/**
		compute the observed distance (number of different pairs of positions per site) 
			between two sequences
		@param seq1 index of sequence 1
		@param seq2 index of sequence 2
		@return the observed distance between seq1 and seq2 (between 0.0 and 1.0)
	*/
// 	double computeObsDist(int seq1, int seq2);

	/**
		@param seq1 index of sequence 1
		@param seq2 index of sequence 2
		@return Juke-Cantor correction distance between seq1 and seq2
	*/
// 	double computeJCDist(int seq1, int seq2);

	/**
		abstract function to compute the distance between 2 sequences. The default return
		Juke-Cantor corrected distance.
		@param seq1 index of sequence 1
		@param seq2 index of sequence 2		
		@return any distance between seq1 and seq2
	*/
// 	virtual double computeDist(int seq1, int seq2) { return computeJCDist(seq1, seq2); }

	/**
		compute distance matrix, assume dist_mat is allocated by memory of size num_seqs * num_seqs.
		@param dist_mat (OUT) distance matrix between all pairs of sequences
	*/
// 	void computeDist(double *dist_mat);

	/**
		write distance matrix into a file in PHYLIP distance format
		@param file_name distance file name
		@param dist_mat distance matrix
	*/
// 	void printDist(const char *file_name, double *dist_mat);

	/**
		write distance matrix into a stream in PHYLIP distance format
		@param out output stream
		@param dist_mat distance matrix
	*/
// 	void printDist(ostream &out, double *dist_mat);

	/**
		read distance matrix from a file in PHYLIP distance format
		@param file_name distance file name
		@param dist_mat distance matrix
	*/
// 	void readDist(const char *file_name, double *dist_mat);

	/**
		read distance matrix from a stream in PHYLIP distance format
		@param in input stream
		@param dist_mat distance matrix
	*/
// 	void readDist(istream &in, double *dist_mat);	
/****************************************************************************
	some statistics
****************************************************************************/

	/**
		compute empirical state frequencies from the alignment
		@param state_freq (OUT) is filled with state frequencies, assuming state_freq was allocated with 
			at least num_states entries.
	*/
// 	void computeStateFreq(double *state_freq);

	/**
		compute empirical rates between state pairs
		@param rates (OUT) vector of size num_states*(num_states-1)/2 for the rates
	*/
// 	void computeEmpiricalRate (double *rates);

	/**
		count the fraction of constant sites in the alignment, update the variable frac_const_sites
	*/
// 	void countConstSite();

	/**
		number of states
	*/
	int num_states;

	/**
		fraction of constant sites
	*/
 	double frac_const_sites;


private:

	/**
		sequence names
	*/
	vector<string> seq_names;

	/**
		Site to pattern index
	*/
	vector<int> site_pattern;

	/**
		hash map from pattern to index in the vector of patterns (the alignment)
	*/
	PatternIntMap pattern_index;

	/**
		get the appearance for a state, helpful for ambigious states
		@param state the state index
		@param state_app (OUT) state appearance
	*/
// 	void getAppearance(char state, double *state_app);
};

#endif
