#!/bin/python
import sys
import csv
import cPickle as pickle
from optparse import OptionParser
import os
import shutil
import aboss
from subprocess import Popen, PIPE

try:
	command = ["hmmscan","-h"]
	process = Popen( command, stdout=PIPE, stderr=PIPE  )
	_, pr_stderr = process.communicate()

except OSError:
	print "You do not have HMMSCAN in your PATH variable installed! Please download HMMSCAN Version 3.1b2 from http://hmmer.org/download.html and put it in your PATH, Thank you :)"
	sys.exit(1)

from aboss.run_ANARCI_parsing import Main, list_of_frames
from aboss.IMGT_germline_reference import CreateIMGTReference
from aboss.reference_matrix import CreateRefMatrix, calculateResidueErrorRate 
from aboss.flagging_residue_position import FlagResiduePositions
from aboss.plotting import PlotGraps
from aboss.redundancy_plot import PlotRedundancyPlot,calculate_filtered_reads
from aboss.run_fasta import extract_aa, checkFasta
from aboss.write_to_csv import writeOutputToCSV
from aboss.common import QueredAmino, check_tempCSV, extractSequences, parse, openIgBlastn, checkIgblastn

VERSION = "ABOSS Version 1.1 "

longVersion = """

|-----------------#   ABOSS   #-----------------|
|                                               |
|  Filtering Next-Generation Sequencing of the  |
|    Ig Gene Repertoire Data Using Antibody     |
|           Structural Information              |
|                                               |
|    Developed by Aleksandr Kovaltsuk, 2018     |
|            Department of Statistics,          |
|               University of Oxford            |
|            <kovaltsuk@stats.ox.ac.uk>         |
|-----------------------------------------------|
"""

class InitiateABOSS(object):
    """
    Class that holds all the functions required for ABOSS analysis
    """
    def __init__(self, fileName, rate_of_analysis, chain, ncpu, DataFormat, species): 

        self.fileName = fileName
        self.rate_of_analysis = rate_of_analysis
        self.chain = chain
        self.ncpu = ncpu
        self.DataFormat = DataFormat
        self.species = species

    def status(self):

        print " \nInput file name: {0}".format(self.fileName)
        print "   Antibody chain: {0}".format(self.chain)
        print "    Number of CPU: {0}".format(self.ncpu)
        print "Input File Format: {0}".format(self.DataFormat)
        print "          Species: {0}\n".format(self.species)

    def openFile(self):
        """
        Function to open fileName which was provided
        If we have IgBlastn output, we read line by line and collect
        amino acids
        """
	if self.DataFormat == "Igblastn":
            self.final_dict = openIgBlastn(self.fileName)
	
        elif self.DataFormat == "Pickle":
            self.final_dict = pickle.load(open(self.fileName,"rb"))

        elif self.DataFormat == "Fasta":
            self.final_dict = extract_aa(self.fileName)
   
        else:
            print "Format was not specified! Trying to identify if Igblastn, Fasta or Pickle\n"
            
            # Block of try except statements
            # checking for fasta
            if ".fasta" in self.fileName or ".fa" in self.fileName:
                    print "File format is FASTA"
                    self.final_dict = extract_aa(self.fileName)
                    return;
            try:
                if checkFasta(self.fileName):
                    print "File format is FASTA"
                    self.final_dict = extract_aa(self.fileName)
                    return;
            except:
                pass
            # Checking for Pickle format
            try:
                 self.final_dict = pickle.load(open(self.fileName,"rb"))
                 if isinstance(self.final_dict, dict):
                     print "File format is Pickle"
                     return;
            except:
                pass
            
            # checking for Igblastn
            try:
                if checkIgblastn(self.fileName):
                    print "File format is Igblastn"
                    self.final_dict = openIgBlastn(self.fileName)
                    return;
            except:
                pass
            print "Format is not found!\nExiting..."
            sys.exit(1)

    def anarciParsing(self):
	"""
	Function that starts ANARCI parsing
        Here we assess every sequence for structural viability as well as IMGT number these sequences
	"""
        
        # Chech if Temp_CSV folder exists if so delete
        check_tempCSV()

        # Parsing starts here
        ANARCI = Main(self.final_dict, self.ncpu, self.rate_of_analysis, self.chain, self.species)
        _, self.observed_V_gene, self.observed_J_gene, self.regions_aaCount =  ANARCI.run()

	print "\n\tParsing is Done"
    
    def createIMGTRef(self):
	"""
	Function that uses observed V and J genes to create numbered IMGT reference
	"""

        IMGTGermRef = CreateIMGTReference(self.species, self.chain, self.observed_V_gene, self.observed_J_gene)
	self.IMGT_reference = IMGTGermRef.reconstructAndNumber()
	print "\n\tIMGT germline reference is created\n"

    def referenceMatrix(self):
	"""
	Class that creates reference matrix for each antibody region
	"""

        self.residueErrorRate = calculateResidueErrorRate(self.regions_aaCount, self.chain)
        print "Residue Error rate: {0}".format(self.residueErrorRate)
        
        for frame in list_of_frames[self.chain]:
            matrix = CreateRefMatrix(self.chain, frame, self.regions_aaCount, self.IMGT_reference, self.residueErrorRate)
            matrix.create()

	print "\n\tReference Matrix has been created\n"

    def flaggingResiduePosition(self):

	print "\n\tFlagging residue/positions has started\n"
	"""
	Flagging residue/positions
	"""
	# ABOSSParsed_pos_Count,ABOSSparsed = FlagResiduePositions(ncpu,final_dict,chain)
        Flags = FlagResiduePositions(self.ncpu, self.final_dict, self.chain)

        Flags.loadRefMatrix()

        self.ABOSSParsed_pos_Count, self.ABOSSparsed =  Flags.flagsequences()
	print "\n\tFlagging residue/positions has finished\n"

    def plotting(self):
	"""
	Plotting Graphs
	"""
    
        print "\n\tPlotting has started \n"
	for frame in list_of_frames[self.chain]:
		try:
                        print "\tPlotting: ", frame
			PlotGraps(frame, self.ABOSSParsed_pos_Count)
		except:
			print "Plotting failed!"

	print "\n\tGraph plotting has finished, Graphs are found in the Graph folder\n"

	"""
	IncorrectVSredundancy plot
	"""
	try:
		PlotRedundancyPlot(self.ABOSSparsed, self.residueErrorRate)
		print "\n\tRedundancy vs flagged residue/positions plotting is finished\n"
	except:
		print "Plotting the Redundancy vs flagged residue/positions plot failed"

    def calculateOutputs(self):
	"""
	Numbers of filtered data sequences
	"""
	calculate_filtered_reads(self.final_dict, self.ABOSSparsed[0])
	print "\n\tThe residue error rate has been writen to Error/ig_seq_residue_error_rate.txt"

    def writingOutputs(self):
	"""
	Write Output to CSV
	"""

	print "\n\tStart writing output to Final_Ig-seq_information.csv\n"
	writeOutputToCSV(self.ABOSSparsed)
	print "\tFinished writing output to Final_Ig-seq_information.csv"

    def cleanUp(self):
	"""
	Clean up
	"""
	print "\n\tDeleting all temporary files\n"
	
	shutil.rmtree("Reference_matrix")
	shutil.rmtree("Temp_CSV") 
        print "\n\t\tDone\n\n"
        
if __name__ == "__main__":
	
	parser = OptionParser(version=VERSION  )
	parser.set_conflict_handler("resolve")
	parser.add_option ("-h", "--help", action = 'help',help = "ABOSS help menu")

	parser.add_option("-n", "--ncores", dest="ncores", type="int", default=8,
		          help="Number of cores. Default is 8")

	parser.add_option("-f", "--file",
                action="store", type="string", dest="filename", help="Specifiy your input file. The acceptable formats are:Igblastn outputs (preferred), amino acid fasta file, dictionary {sequence:redundancy, sequence2:redundancy2} in pickle format. See directory Examples for these file formats")

	parser.add_option("-o", "--format",action="store", type="string", dest="format", help="Specify format:Igblastn,Fasta,Pickle")

	parser.add_option("-c", "--chain",
                  action="store", type="string", dest="chain", default="H",help="Antibody chain [H,L]. Default is H")

	parser.add_option("-r", "--rate",
                  action="store", type="int", dest="analysis_rate", default= 1000, help="Rate of analysis for one ANARCI parsing round. Default is 1000")

	parser.add_option("-s", "--species",
                  action="store", type="string", dest="current_species", default= "human", help="Species e.g. human, mouse, alpaca, rhesus. Default is human")

	(options, args) = parser.parse_args()

	if not options.filename:
            print longVersion
            parser.print_help()
            sys.exit(1)
	if not options.filename:
            print "\nThe filename is not specified! Exiting\n"
            sys.exit(1)

	if not options.format:
            print "\nWARNING: File format is not specified! Select following formats Igblastn, Fasta, Pickle. See directory Examples for each format example \n"
            
        ABOSS =	InitiateABOSS(options.filename, options.analysis_rate, options.chain, options.ncores, options.format, options.current_species)
        
        # We print all the inputs that were passed to ABOSS
        ABOSS.status()

        # We open the input files that the user provided
        ABOSS.openFile()

        # Starting ANARCI parsing
        ABOSS.anarciParsing()

        # Creating IMGT germline reference
        ABOSS.createIMGTRef()

        # Building reference matrix for the antibody repertoire
        ABOSS.referenceMatrix()

        # Flagging residues
        ABOSS.flaggingResiduePosition()

        # Plotting
        ABOSS.plotting()

        # Calculate output Numbers
        ABOSS.calculateOutputs()

        # Writing outputs
        ABOSS.writingOutputs()

        # Clean UP
        ABOSS.cleanUp()
