#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys, os

if __name__ == "__main__":
  # Make sure we can import stuff from this file's directory
  sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), "..", "lib-python"))

from prosci.util.ali import Ali
from prosci.util.seq import pid


# All pid functions take in two aligned sequences and give back three values:
# 
# % identity, % coverage, total number of residues (of which the other values are percentages)
#
# The value used as the total varies between functions.
#



if __name__ == "__main__":
  import prosci.shell

  params = prosci.shell.Params(withargument=(), allowed=('s', 'l', 'f', 'g', 'a', 'b'))
  
  def help_msg():
    sys.stdout.write("USAGE: %s OPTIONS ali_file\n" % (params.scriptname))
    sys.stdout.write("\n")
    sys.stdout.write("OPTIONS:\n")
    sys.stdout.write("    -s    Use shorter sequence as reference.\n")
    sys.stdout.write("    -l    Use longer  sequence as reference.\n")
    sys.stdout.write("    -f    Use first   sequence as reference.\n")
    sys.stdout.write("    -g    Use second  sequence as reference.\n")
    sys.stdout.write("    -a    Use number of aligned residues as reference.\n")
    sys.stdout.write("    -b    Use length of gapped alignment as reference.\n")
    sys.stdout.write("\n")
    sys.stdout.write("Calculates the %ID for the first two sequence/structure entries in the given ALI file.\n")
    sys.stdout.write("Values returned are (on one line, tab-separated):\n")
    sys.stdout.write("\t% Identity\n")
    sys.stdout.write("\t% Coverage\n")
    sys.stdout.write("\tReference number (absolute identity equivalent to 100%)\n")
    sys.stdout.write("\tLength of sequence 1\n")
    sys.stdout.write("\tLength of sequence 2\n")
    sys.stdout.write("\n")
    sys.exit(1)

  
  if len(params.args) < 1:
    help_msg()
  
  if   's' in params.opts:
    pid_mode = "shorter"
  elif 'l' in params.opts:
    pid_mode = "longer"
  elif 'f' in params.opts:
    pid_mode = "first"
  elif 'g' in params.opts:
    pid_mode = "second"
  elif 'a' in params.opts:
    pid_mode = "aligned"
  elif 'b' in params.opts:
    pid_mode = "all"
  else:
    sys.stderr.write("You must specify one of the OPTIONS (see below).\n\n")
    help_msg()
  
  if params.args[0] == "-":
    f = sys.stdin
  else:
    f = file(params.args[0])
  
  entries = Ali(f)
  if not len(entries[0].master.seq) == len(entries[1].master.seq):
    entries.align()
  #seqs      = entries.get_entries_struc() + entries.get_entries_seq()
  seqs = [eg.master.seq for eg in entries]
  pid, pco, nco, l1, l2 = pid(seqs[0], seqs[1], pid_mode)
  
  print "%.2f\t%.2f\t%d\t%d\t%d" % (pid*100.0, pco*100.0, nco, l1, l2)
