#include "SWCuda.h"


//#include "Defines.cuh"
#define short_min -16000
#define result_number 4
#define line_end '\0'
#define ref_position 0
#define qstart 1
#define qend 2
#define alignment_offset 3
#define param_best_read_index 0
#define param_best_ref_index 1


#define CIGAR_M 0
#define CIGAR_I 1
#define CIGAR_D 2
#define CIGAR_N 3
#define CIGAR_S 4
#define CIGAR_H 5
#define CIGAR_P 6
#define CIGAR_EQ 7
#define CIGAR_X 8
#define CIGAR_STOP 10


#include "Scores.cu"

//#include "Cigar.cu"
//#include "SwAlignment.cu"



void SWCuda::Allocate_Scoring() {
	cudaHostAlloc(&scaff_cpu,(size_t)mem_scaff*sizeof(char),cudaHostAllocWriteCombined);
	cudaHostAlloc(&reads_cpu,(size_t)mem_reads*sizeof(char),cudaHostAllocWriteCombined);

	cudaMalloc((void**) &scaff_dest_gpu, mem_scaff * sizeof(char));
	cudaMalloc((void**) &scaffold_gpu, mem_scaff * sizeof(char));
	cudaMalloc((void**) &reads_gpu, mem_reads * sizeof(char));

	cudaMalloc((void**) &results_gpu, mem_result * sizeof(float));
	checkCUDAError("after memcpy");
}

void SWCuda::Allocate_Alginment() {

	cudaHostAlloc(&scaff_cpu,(size_t)mem_scaff*sizeof(char),cudaHostAllocWriteCombined);
	cudaHostAlloc(&reads_cpu,(size_t)mem_reads*sizeof(char),cudaHostAllocWriteCombined);
	checkCUDAError("after allocate1");
	pos_cpu = new short[mem_pos];
	results_cigar_cpu=new short[mem_result_cigar];
	results_Alignment_cpu = new char[mem_result];

	cudaMalloc((void**) &scaff_dest_gpu, mem_scaff * sizeof(char));
	cudaMalloc((void**) &scaffold_gpu, mem_scaff * sizeof(char));
	checkCUDAError("after allocate2");
	cudaMalloc((void**) &reads_gpu, mem_reads * sizeof(char));
	cudaMalloc((void**) &matrix_gpu, mem_matrix * sizeof(char));
	checkCUDAError("after allocate3");
	cudaMalloc((void**) &pos_gpu, mem_pos * sizeof(short));
	cudaMalloc((void**) &results_Alignment_gpu, mem_result * sizeof(char));
	checkCUDAError("after allocate4");
	cudaMalloc((void**) &results_cigar_gpu, mem_result_cigar * sizeof(short));
	checkCUDAError("after allocate5");
}




void SWCuda::FreeMem() {
	cudaFree(reads_gpu);
	cudaFree(scaffold_gpu);
	cudaFree(scaff_dest_gpu);
	cudaFreeHost(scaff_cpu);
	cudaFreeHost(reads_cpu);

	checkCUDAError("after delete2");
	if (type) {
		cudaFree(results_gpu);
	} else {
		cudaFree(matrix_gpu);

		if(!cigar){
			cudaFree(results_Alignment_gpu);
		}else{
			cudaFree(results_cigar_gpu);
		}
		cudaFree(pos_gpu);

		checkCUDAError("after delete1");

		delete[] results_cigar_cpu;
		delete[] results_Alignment_cpu;
		delete[] pos_cpu;
	}
	checkCUDAError("after delete free mem");
}


void SWCuda::threadExit() {


	cudaFree(reads_gpu);
	cudaFree(scaffold_gpu);
	cudaFree(scaff_dest_gpu);

	cudaFreeHost(scaff_cpu);
	cudaFreeHost(reads_cpu);
	checkCUDAError("after delete host");
	if (type) {
		cudaFree(results_gpu);
	} else {
		cudaFree(matrix_gpu);
		if(mem_result_cigar!=0){
			cudaFree(results_Alignment_gpu);
		}else{
			cudaFree(results_cigar_gpu);
		}
		cudaFree(pos_gpu);
	}
	cudaStreamDestroy(stream[0]);
	cudaStreamDestroy(stream[1]);
	checkCUDAError("after delete end");
	cudaThreadExit();
}


SWCuda::SWCuda(int gpu_id):Alignment(gpu_id){
	short mat= (short)Config.GetFloat("score_match");
	short mis= (short)Config.GetFloat("score_mismatch");
	short gap_rea = (short)Config.GetFloat("score_gap_read");
	short gap_ref = (short)Config.GetFloat("score_gap_ref");

	mem_pos = result_number;

	long mem_per_thread = (sizeof(char) * (corridor_length + 1) * (read_length + 1)) +(read_length * sizeof(char)) +( 2*ref_length * sizeof(char)) + (result_number * sizeof(short))+ (alignment_length  * 2 * sizeof(short));

	align_batch_size = calc_batchsize(mem_per_thread);


	block_size_align=blocks;

	mem_result=1;
	mem_per_thread = read_length * sizeof(char) + mem_result* sizeof(float) + 2*ref_length * sizeof(char);
	score_batch_size=calc_batchsize(mem_per_thread);
	block_size_score=blocks;
	type = true;
	step_count = Config.GetInt("step_count");
	if (step_count == 0) {
		step_count = 1;
	}
	SetForScoreing();

	cudaMemcpyToSymbol(match,&mat,sizeof(short));
	cudaMemcpyToSymbol(mismatch,&mis,sizeof(short));
	cudaMemcpyToSymbol(gapread,&gap_rea,sizeof(short));
	cudaMemcpyToSymbol(gapref,&gap_ref,sizeof(short));


	cudaMemcpyToSymbol(corr_len,&corridor_length,sizeof(short));
	cudaMemcpyToSymbol(read_len,&read_length,sizeof(short));

	cudaMemcpyToSymbol(ref_len,&ref_length,sizeof(short));

	cudaMemcpyToSymbol(alignment_length_gpu,&alignment_length,sizeof(short));

	checkCUDAError("init");
	cudaStreamCreate(&stream[0]);
	cudaStreamCreate(&stream[1]);

}

int SWCuda::GetScoreBatchSize() {
	return score_batch_size*step_count;
}
int SWCuda::GetAlignBatchSize() {
	return align_batch_size*step_count;
}

SWCuda::~SWCuda(){
	threadExit();
}


//SETTER:

void SWCuda::Set_Meme_Scoring() {
	cudaMemset(scaff_dest_gpu, '\5', mem_scaff * sizeof(char));
	cudaMemset(results_gpu, 0, mem_result * sizeof(float));
	cudaMemset(scaffold_gpu,'\5', mem_scaff * sizeof(char));
	cudaMemset(reads_gpu, '\5', mem_reads * sizeof(char));
	checkCUDAError("set mem");
}
void SWCuda::Set_Meme_Alginment() {

	memset(results_Alignment_cpu, ' ', mem_result * sizeof(char));

	cudaMemset(scaff_dest_gpu, '\5', mem_scaff * sizeof(char));
	cudaMemset(scaffold_gpu, '\5', mem_scaff * sizeof(char));
	cudaMemset(reads_gpu, '\5', mem_reads * sizeof(char));

	cudaMemset(results_cigar_gpu, 0, mem_result_cigar * sizeof(short));
	cudaMemset(results_Alignment_gpu, '\0', mem_result * sizeof(char));
	cudaMemset(pos_gpu, 9, mem_pos * sizeof(short));
	cudaMemset(matrix_gpu,CIGAR_STOP, mem_matrix * sizeof(char));
}


void SWCuda::SetForScoreing() {
	if (!type) {
		FreeMem();
	}
	type = true;

	blocks=block_size_score;
	batch_size=score_batch_size;//calc_batchsize(mem_per_thread);


	mem_reads=read_length*batch_size;
	mem_scaff=ref_length*batch_size;

	mem_result = batch_size;
	mem_matrix =0;

	shared_mem =threads*(corridor_length) * sizeof(short);

	Allocate_Scoring();
	Set_Meme_Scoring();
	checkCUDAError("init");


}

void SWCuda::SetForBacktracking() {

	if (type) {
		FreeMem();
	}
	type = false;

	long mem_per_thread = (sizeof(char) * (corridor_length + 1) * (read_length + 1)) +(read_length * sizeof(char)) +( 2*ref_length * sizeof(char)) + (result_number * sizeof(short))+ (alignment_length  * 2 * sizeof(short));
	batch_size = calc_batchsize(mem_per_thread);

	blocks=block_size_align;
	batch_size=align_batch_size;

	mem_reads=read_length*batch_size;
	mem_scaff=ref_length*batch_size;

	mem_pos = result_number * batch_size;

	if(cigar){
		mem_result_cigar = alignment_length  * 2* batch_size;
		mem_result = 0;
	}else{
		mem_result = alignment_length * 2* batch_size;
		mem_result_cigar=0;
	}

	mem_matrix = (corridor_length + 1) * (read_length+ 1)*batch_size;


	shared_mem = threads*(corridor_length) * sizeof(short);

	Allocate_Alginment();
	Set_Meme_Alginment();
	checkCUDAError("init");
}


int SWCuda::CalcScores_SW(int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, float * const results) {


	dim3 dimBlock(threads, 1);
	dim3 dimGrid(blocks, 1);

	int batch =batchSize;
	Prepare(refSeqList,scaff_cpu,ref_length, min(batch,batch_size));

	cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
	interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);


	for (int i = 0; i<batchSize; i += batch_size) {

		Prepare(&qrySeqList[i],reads_cpu,read_length, min(batch,batch_size));
		cudaMemcpyAsync(reads_gpu, reads_cpu, mem_reads * sizeof(char), cudaMemcpyHostToDevice,stream[1]);
		cudaStreamSynchronize(stream[0]);
		SW_Cuda<<<dimGrid,dimBlock,shared_mem,stream[1]>>>(scaff_dest_gpu,reads_gpu,results_gpu);

		batch-=batch_size;
		if (batch>0){
			Prepare(&refSeqList[i+batch_size],scaff_cpu,ref_length, min(batch,batch_size));
			cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
		}

		cudaStreamSynchronize(stream[1]);
		CopyfromDevice(&results[i], min(batch+batch_size,batch_size));
		if (batch>0){
			interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);
		}
	}
	checkCUDAError("after calc SW score");

	return batchSize;

}

int SWCuda::CalcScores_NW(int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, float * const results) {

	dim3 dimBlock(threads, 1);
	dim3 dimGrid(blocks, 1);

	int batch =batchSize;
	Prepare(refSeqList,scaff_cpu,ref_length, min(batch,batch_size));

	cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
	interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);

	for (int i = 0; i<batchSize; i += batch_size) {
		Prepare(&qrySeqList[i],reads_cpu,read_length, min(batch,batch_size));
		cudaMemcpyAsync(reads_gpu, reads_cpu, mem_reads * sizeof(char), cudaMemcpyHostToDevice,stream[1]);
		cudaStreamSynchronize(stream[0]);

		NW_Cuda<<<dimGrid,dimBlock,shared_mem,stream[1]>>>(scaff_dest_gpu,reads_gpu,results_gpu);

		batch-=batch_size;
		if (batch>0){
			Prepare(&refSeqList[i+batch_size],scaff_cpu,ref_length, min(batch,batch_size));
			cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);

		}
		cudaStreamSynchronize(stream[1]);
		CopyfromDevice(&results[i], min(batch+batch_size,batch_size));
		if(batch>0){
			interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);
		}
	}
	checkCUDAError("after calc NW score");
	return batchSize;

}

int SWCuda::Calc_Alignment_SW(int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, Align * results) {

	dim3 dimBlock(threads, 1);
	dim3 dimGrid(blocks, 1);
	int batch =batchSize;

	Prepare(refSeqList,scaff_cpu,ref_length, min(batch,batch_size));
	cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
	interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);

	for (int i = 0; i<batchSize; i += batch_size) {
		Prepare(&qrySeqList[i],reads_cpu,read_length, min(batch,batch_size));
		cudaMemcpyAsync(reads_gpu, reads_cpu, mem_reads * sizeof(char), cudaMemcpyHostToDevice,stream[1]);
		cudaStreamSynchronize(stream[0]);
		CUDASW_Score<<<dimGrid,dimBlock,shared_mem,stream[1]>>>(scaff_dest_gpu,reads_gpu, pos_gpu,matrix_gpu);

		if (batch-batch_size>0){
			Prepare(&refSeqList[i+batch_size],scaff_cpu,ref_length, min(batch-batch_size,batch_size));
			cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
		}
		cudaStreamSynchronize(stream[1]);
		CUDASW_Backtracking<<<dimGrid,dimBlock,2*threads*sizeof(short),stream[1]>>>(scaff_dest_gpu, reads_gpu,pos_gpu,matrix_gpu,results_Alignment_gpu);

		cudaStreamSynchronize(stream[1]);
		CopyfromDevice_Alignemt();
		if(batch-batch_size>0){
			interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);
		}


		for (int j = 0; j < min(batch,batch_size); ++j) {
			float total = 0.0f;
			float match = 0.0f;
			char * read = results[i+j].pQry;
			char * ref = results[i+j].pRef;
			char * tempAlign = results_Alignment_cpu + j * alignment_length * 2;

			int index = 0;
			for (int t = pos_cpu[result_number * j + 3] + 1; t < alignment_length; ++t) {
				ref[index] = tempAlign[t];
				read[index] = tempAlign[t + alignment_length];

				if (read[index] != ' ' && read[index] != '-') {
					total++;
				}
				if (read[index] != ' ' && read[index] == ref[index]) {
					match++;
				}
				index += 1;
			}
			results[i+j].PositionOffset = pos_cpu[result_number * j];
			results[i+j].QStart = pos_cpu[result_number * j + 1];
			results[i+j].QEnd = pos_cpu[result_number * j + 2];
			results[i+j].Identity = match / total;

		}
		batch-=batch_size;
	}

	checkCUDAError("ende align");

	return batchSize;
}


int SWCuda::Calc_Alignment_SW_cigar(int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, Align * results) {

	dim3 dimBlock(threads, 1);
	dim3 dimGrid(blocks, 1);
	int batch =batchSize;

	Prepare(refSeqList,scaff_cpu,ref_length, min(batch,batch_size));
	cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
	interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);

	for (int i = 0; i<batchSize; i += batch_size) {
		Prepare(&qrySeqList[i],reads_cpu,read_length, min(batch,batch_size));
		cudaMemcpyAsync(reads_gpu, reads_cpu, mem_reads * sizeof(char), cudaMemcpyHostToDevice,stream[1]);

		cudaStreamSynchronize(stream[0]);
		CUDASW_Score<<<dimGrid,dimBlock,shared_mem,stream[1]>>>(scaff_dest_gpu,reads_gpu, pos_gpu,matrix_gpu);

		if (batch-batch_size>0){
			Prepare(&refSeqList[i+batch_size],scaff_cpu,ref_length, min(batch-batch_size,batch_size));
			cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
		}

		CUDASW_Backtracking_CIGAR<<<dimGrid,dimBlock,2*threads*sizeof(short),stream[1]>>>(scaff_dest_gpu, reads_gpu,pos_gpu,matrix_gpu,results_cigar_gpu);

		cudaStreamSynchronize(stream[1]);
		cudaMemcpy(results_cigar_cpu, results_cigar_gpu, mem_result_cigar * sizeof(short), cudaMemcpyDeviceToHost);
		cudaMemcpy(pos_cpu, pos_gpu, mem_pos * sizeof(short), cudaMemcpyDeviceToHost);

		if(batch-batch_size>0){
			interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);
		}

		for (int j = 0; j < min(batch_size,batch); ++j) {
			short * gpuCigar = results_cigar_cpu + j * alignment_length * 2;
			computeCigarMD(results[i+j], pos_cpu[result_number * j + 3], gpuCigar, refSeqList[i+j] + pos_cpu[result_number * j]);
			results[i+j].PositionOffset = pos_cpu[result_number * j];
		}
		batch-=batch_size;
	}

	checkCUDAError("after calc SW cigar");

	return batchSize;
}

int SWCuda::Calc_Alignment_NW(int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, Align * results) {

	dim3 dimBlock(threads, 1);
	dim3 dimGrid(blocks, 1);
	int batch =batchSize;

	Prepare(refSeqList,scaff_cpu,ref_length, min(batch,batch_size));
	cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
	interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);

	for (int i = 0; i<batchSize; i += batch_size) {
		Prepare(&qrySeqList[i],reads_cpu,read_length, min(batch,batch_size));
		cudaMemcpyAsync(reads_gpu, reads_cpu, mem_reads * sizeof(char), cudaMemcpyHostToDevice,stream[1]);

		cudaStreamSynchronize(stream[0]);
		CUDANW_Score<<<dimGrid,dimBlock,shared_mem,stream[1]>>>(scaff_dest_gpu,reads_gpu, pos_gpu,matrix_gpu);

		if (batch-batch_size>0){
			Prepare(&refSeqList[i+batch_size],scaff_cpu,ref_length, min(batch-batch_size,batch_size));
			cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
		}
		cudaStreamSynchronize(stream[1]);
		CUDASW_Backtracking<<<dimGrid,dimBlock,2*threads*sizeof(short),stream[1]>>>(scaff_dest_gpu, reads_gpu,pos_gpu,matrix_gpu,results_Alignment_gpu);

		cudaStreamSynchronize(stream[1]);
		CopyfromDevice_Alignemt();
		if(batch-batch_size>0){
			interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);
		}


		for (int j = 0; j < min(batch,batch_size); ++j) {
			float total = 0.0f;
			float match = 0.0f;
			char * read = results[i+j].pQry;
			char * ref = results[i+j].pRef;
			char * tempAlign = results_Alignment_cpu + j * alignment_length * 2;

			int index = 0;
			for (int t = pos_cpu[result_number * j + 3] + 1; t < alignment_length; ++t) {
				ref[index] = tempAlign[t];
				read[index] = tempAlign[t + alignment_length];

				if (read[index] != ' ' && read[index] != '-') {
					total++;
				}
				if (read[index] != ' ' && read[index] == ref[index]) {
					match++;
				}
				index += 1;
			}
			results[i+j].PositionOffset = pos_cpu[result_number * j];
			results[i+j].QStart = pos_cpu[result_number * j + 1];
			results[i+j].QEnd = pos_cpu[result_number * j + 2];
			results[i+j].Identity = match / total;

		}
		batch-=batch_size;
	}

	checkCUDAError("ende align");

	return batchSize;
}

int SWCuda::Calc_Alignment_NW_cigar(int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, Align * results) {

	dim3 dimBlock(threads, 1);
	dim3 dimGrid(blocks, 1);
	int batch =batchSize;

	Prepare(refSeqList,scaff_cpu,ref_length, min(batch,batch_size));
	cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
	interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);

	for (int i = 0; i<batchSize; i += batch_size) {
		Prepare(&qrySeqList[i],reads_cpu,read_length, min(batch,batch_size));
		cudaMemcpyAsync(reads_gpu, reads_cpu, mem_reads * sizeof(char), cudaMemcpyHostToDevice,stream[1]);

		cudaStreamSynchronize(stream[0]);
		CUDANW_Score<<<dimGrid,dimBlock,shared_mem,stream[1]>>>(scaff_dest_gpu,reads_gpu, pos_gpu,matrix_gpu);

		if (batch-batch_size>0){
			Prepare(&refSeqList[i+batch_size],scaff_cpu,ref_length, min(batch-batch_size,batch_size));
			cudaMemcpyAsync(scaffold_gpu, scaff_cpu, mem_scaff * sizeof(char), cudaMemcpyHostToDevice,stream[0]);
		}

		CUDASW_Backtracking_CIGAR<<<dimGrid,dimBlock,2*threads*sizeof(short),stream[1]>>>(scaff_dest_gpu, reads_gpu,pos_gpu,matrix_gpu,results_cigar_gpu);

		cudaStreamSynchronize(stream[1]);
		cudaMemcpy(results_cigar_cpu, results_cigar_gpu, mem_result_cigar * sizeof(short), cudaMemcpyDeviceToHost);
		cudaMemcpy(pos_cpu, pos_gpu, mem_pos * sizeof(short), cudaMemcpyDeviceToHost);

		if(batch-batch_size>0){
			interleaveSeq<<<dimGrid,dimBlock,0,stream[0]>>>(scaffold_gpu,scaff_dest_gpu);
		}

		for (int j = 0; j < min(batch_size,batch); ++j) {
			short * gpuCigar = results_cigar_cpu + j * alignment_length * 2;
			computeCigarMD(results[i+j], pos_cpu[result_number * j + 3], gpuCigar, refSeqList[i+j] + pos_cpu[result_number * j]);
			results[i+j].PositionOffset = pos_cpu[result_number * j];
		}
		batch-=batch_size;
	}

	checkCUDAError("after calc NW cigar");

	return batchSize;
}


int SWCuda::BatchAlign(int const mode, int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, Align * const results,void * extData) {

	if (batchSize <= 0) {
		Log.Warning("Align for batchSize <= 0");
		return 0;
	}

	if (type) {
		SetForBacktracking();
	}

	switch (mode & 0xFF) {
		case 0:
			if(cigar){
				return Calc_Alignment_SW_cigar(batchSize, refSeqList, qrySeqList, results);
			}else{
				return Calc_Alignment_SW(batchSize, refSeqList, qrySeqList, results);
			}
		case 1:
			if(cigar){
				return Calc_Alignment_NW_cigar(batchSize, refSeqList, qrySeqList, results);
			}else{
				return Calc_Alignment_NW(batchSize, refSeqList, qrySeqList, results);
			}
		default:
			return 0;
	}

}

int SWCuda::BatchScore(int const mode, int const batchSize, char const * const * const refSeqList, char const * const * const qrySeqList, float * const results,void * extData) {
	if (batchSize <= 0) {
		Log.Warning("Score for batchSize <= 0");
		return 0;
	}

	if (!type) {
		SetForScoreing();
	}

	switch (mode & 0xFF) {
		case 0:
			return CalcScores_SW(batchSize, refSeqList, qrySeqList, results);
		case 1:
			return CalcScores_NW(batchSize, refSeqList, qrySeqList, results);
		default:
			Log.Error("Unsupported alignment mode %i", mode & 0xFF);
			return 0;
	}

}

