#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# iMembrane : search executable
#
# Author: Sebastian Kelm
# Created: 10/05/2010 - rewritten from original Perl executable
#

import sys
import os.path
from glob import glob
#from threading import Thread

if __name__ == "__main__":
  # Make sure we can import stuff
  sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), "..", "lib-python"))

from prosci.common import IllegalStateError
from prosci.imembrane.searcher import Searcher
from prosci.shell import Params


params = Params(withargument=("dbdir", "seq", "struc", "inputid", "outdir", "cutoff-tm", "cutoff-rmsd", "cutoff-pid", "cutoff-pcov", "cutoff-pannot", "cutoff-e", "cutoff-e2", "iterations", "seqprofile", "hitlog", "maxhits", "outputparent"),
                     allowed=("dbdir", "seq", "struc", "inputid", "outdir", "cutoff-tm", "cutoff-rmsd", "cutoff-pid", "cutoff-pcov", "cutoff-pannot", "cutoff-e", "cutoff-e2", "iterations", "seqprofile", "hitlog", "maxhits", "help", "verbose", "fast", "exhaustive", "variants", "write-input-sequence", "joy", "transmembrane"))

if not params.opts or params.args or "help" in params.opts:
    print """
iMembrane - database search engine

Input:  A protein sequence or structure
Output: Projected JOY-style annotation for the input protein,
        based on all database hits


USAGE:
"""+params.scriptname+""" --dbdir <DIRECTORY> <INPUT_OPTIONS> [SEARCH_OPTIONS]


ENVIRONMENT SETTINGS:

  --dbdir DIRECTORY
            Specifies the path to the main directory of iMembraneDB.
            (Required.)


INPUT OPTIONS:
  
  You must specify at least one of --seq and --struc.
  
  --seq FILE
            Input sequence, in PIR/JOY format.
            Can also be an alignment between input and database proteins.
            (FILE can be '-', for STDIN.)

  --struc FILE
            Input structure, in PDB format.

  --inputid ID
            ID of the input protein in the file provided by --seq.
            Optional if the seq file contains only one sequence/structure entry.


OUTPUT OPTIONS:

  --outdir DIR
            Directory where to create output sub-directories containing results.
            Defaults to the current working directory.
  
  --hitlog FILENAME  (Default: hits.table)
            File where a table of hits should be written to.
  
  --outputparent MODE  ("copy" OR "link" OR "none"; Default: "copy")
            Should each hit contain a copy of / link to the parent entry in the database?
  
  --verbose
            Print status messages.

  --write-input-sequence
            Write a file called sequence.fasta for each hit, containing the
            input sequence. This is useful if you wish to build a new database
            from the annotated input protein.
            Usually, this option is used in conjunction with --maxhits 1.
  
  --joy
            Run JOY on all results and include JOY annotation in the file named
            query_annotation.tem
            NOTE: Requires the executable "joy" to be in your PATH. Otherwise
                  this option will result in an error.


SEARCH OPTIONS:
  
  By default, iMembraneDB is first searched using PSI-BLAST (3 iterations).
  Only if no hits are found, an exhaustive search is performed. The below
  options modify this behaviour.

  --exhaustive
            Skip BLAST search completely and go straight to the exhaustive
            search.
  
  --fast
            Never perform exhaustive search, even if BLAST search yields no
            hits.
  
  
  The following options control which entries of iMembraneDB are used to
  annotate your input. The default options shown in parentheses.
  
  --transmembrane
            Require all hits to have at least one residue in the tail layer of
            the membrane.
  
  --variants
            By default, iMembrane only considers one instance of each database
            protein as a possible hit. If this option is given, all instances
            (multiple occurences of the same chain, multiple MD simulations) are
            considered.
  
  --maxhits NUMBER
            Output the best NUMBER hits, at most. By default, all hits
            are returned. You can use this to save disk space.

  --cutoff-tm   NUMBER  (default:  0.5)
  --cutoff-rmsd NUMBER  (default: 10.0)
            Cut-off for TM-score and RMSD. Any hits with values worse than the
            provided cut-off are ignored.
            (Only applicable if a structure was provided as input.)
            
  --cutoff-pid    NUMBER  (default: 0.00)
  --cutoff-pcov   NUMBER  (default: 0.50)
  --cutoff-pannot NUMBER  (default: 0.50)
            Cut-off for percentage sequence identity, percentage coverage and
            percentage annotated. Any hits with values worse than the provided
            cut-off are ignored. Cut-off values are bounded [0, 1].
  
  --cutoff-e  NUMBER  (default: 1.0)
  --cutoff-e2 NUMBER  (default: 0.002)
            Controls the cut-off E value for the initial (PSI-)BLAST search.
            e2 refers to the PSI-BLAST E-value, which determines whether a
            sequence is added to the sequence profile.
            (This option has no effect in combination with --exhaustive.)
  
  --iterations NUMBER  (default: 3)
            Controls the number of PSI-BLAST iterations to perform.
            (This option has no effect in combination with --exhaustive.)
  
  --seqprofile SEQFILE
            Perform a PSI-BLAST search, starting with a profile constructed from
            the sequences in SEQFILE (ASCII text file in FASTA format).
            (This option has no effect in combination with --exhaustive.)"""
    sys.exit(1)


verbose = params.isOpt("verbose")

maxhits = params.getOpt("maxhits")
if maxhits:
  maxhits = int(maxhits)

iterations    =   int(params.getOpt("iterations",    3))
cutoff_e      = float(params.getOpt("cutoff-e",      1.0))
cutoff_e2     = float(params.getOpt("cutoff-e2",     0.002))
cutoff_tm     = float(params.getOpt("cutoff-tm",     0.5))
cutoff_rmsd   = float(params.getOpt("cutoff-rmsd",  10.0))
cutoff_pid    = float(params.getOpt("cutoff-pid",    0.00))
cutoff_pcov   = float(params.getOpt("cutoff-pcov",   0.50))
cutoff_pannot = float(params.getOpt("cutoff-pannot", 0.50))

exhaustive=params.isOpt("exhaustive")
fast=params.isOpt("fast")

if params.isOpt("seq"):
  if params.isOpt("exhaustive"):
    sys.stderr.write("Error: Exhaustive search is disabled for sequence input. Executing fast search instead.")
  fast = True
  exhaustive = False

if verbose:
  print "Search options:"
  print "fast\t%d"%(fast)
  print "exhaustive\t%d"%(exhaustive)
  print "e\t%f"%(cutoff_e)
  print "e2\t%f"%(cutoff_e2)
  print "tm\t%f"%(cutoff_tm)
  print "rmsd\t%f"%(cutoff_rmsd)
  print "pid\t%f"%(cutoff_pid)
  print "pcov\t%f"%(cutoff_pcov)
  print "pannot\t%f"%(cutoff_pannot)
  print



searcher = Searcher(params.getOpt("dbdir"), params.getOpt("inputid"), params.getOpt("seq"), params.getOpt("struc"), seqprofile=params.getOpt("seqprofile"), outdir=params.getOpt("outdir"), maxhits=maxhits, hitlog=params.getOpt("hitlog", "hits.table"), verbose=verbose, variants=params.isOpt("variants"), exhaustive=exhaustive, fast=fast, write_input_sequence=params.isOpt("write-input-sequence"), run_joy=params.isOpt("joy"), transmembrane=params.isOpt("transmembrane"), outputparent=params.getOpt("outputparent", "copy"))

#hits = searcher.search(iterations=iterations, cutoff_e=cutoff_e, cutoff_e2=cutoff_e2, cutoff_tm=cutoff_tm, cutoff_rmsd=cutoff_rmsd, cutoff_pid=cutoff_pid, cutoff_pcov=cutoff_pcov, cutoff_pannot=cutoff_pannot)

hits = searcher.search(iterations, cutoff_e, cutoff_e2, cutoff_tm, cutoff_rmsd, cutoff_pid, cutoff_pcov, cutoff_pannot)
