#!/usr/bin/env python
# -*- coding: utf-8 -*-
#

import sys
import os

# Make sure we can import stuff from this file's directory
sys.path.append(os.path.abspath(os.path.dirname(sys.argv[0])))
sys.path.append(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), "..", "..", "lib-python"))

from prosci.shell import Params
from prosci.loops.freaddb import FreadDB
from prosci.util.getpdb import get_pdb_path



params = Params(withargument=["terminal", "minlen", "maxlen", "chains", "abdb", "pdbmirror"], allowed=["struconly", "indexonly"])

if len(params.args) < 1 or len(params.args) < 2 and not params.isOpt("indexonly") and not (params.isOpt("pdbmirror") and (params.isOpt("chains") or params.isOpt("abdb"))):
  print "USAGE: %s [OPTIONS] <pyfread_db> <pdb_file ...>" % (params.scriptname)
  print """
    <pyfread_db>    Root directory of the PyFREAD database
    <pdb_file ...>  One or more PDB files to be added to the database
  
  OPTIONS:
      --minlen NUM      Minimum loop length (default: 3)
      --maxlen NUM      Maximum loop length (default: 30)
      --terminal NUM    Ignore the first and last NUM residues (default: 5).

      --chains FILE     Select only certain chains in each PDB structure to
                        add to the database. All chains will be used to
                        determine inter-chain contacts.
                        A line in FILE should have any of the following formats:
                        /path/to/my/structure1.pdb XYZ
                        structure1.pdb XYZ
                        1abc XYZ
                        1abcXYZ

      --abdb FILE       Provide an ABDB summary file, with which to construct
                        the chain selection filter.

      --pdbmirror DIR   Grab structures given in --chains or --abdb from DIR.

      --struconly       Only add structure files, don't index them.
      --indexonly       Only index existing structure files, don't add new ones.
  """
  sys.exit(1)


min_loop_length = int(params.getOpt("minlen", 3))
max_loop_length = int(params.getOpt("maxlen", 30))
terminal_cutoff = int(params.getOpt("terminal", 5)) # Skip this many residues at the N and C termini
chains_file = params.getOpt("chains", None)
abdb_file = params.getOpt("abdb", None)
pdbmirror_dir = params.getOpt("pdbmirror", None)
assert terminal_cutoff >= 1

dbdir = params.args[0]
structure_files = params.args[1:]

if not params.isOpt("indexonly"):
  chain_map = {}
  if pdbmirror_dir:
    added_pdbs=set([])
  
  if chains_file:
    f = open(chains_file)
    try:
      for line in f:
        
        fields= line.split(None, 1)
        if len(fields) == 1:
          fname = fields[0][:4].lower()
          chains = fields[0][4:]
        else:
          fname, chains = fields
        
        if pdbmirror_dir:
          if fname not in added_pdbs:
            added_pdbs.add(fname)
            fname = get_pdb_path(fname, pdbmirror_dir)
            if os.path.exists(fname):
              structure_files.append(fname)
            else:
              print "Structure not found in PDB mirror directory:", fname
        
        if fname in chain_map:
          chain_map[fname] += chains
        else:
          chain_map[fname] = chains
    finally:
      f.close()
  
  if abdb_file:
    f = open(abdb_file)
    try:
      next(f)
      for line in f:
        try:
          fields = line.split("\t")
          fname, hchain, lchain = fields[:3]
          resolution = fields[13]
          r_free = fields[15]
          r_factor = fields[16]
        except:
          print line
          raise
        if pdbmirror_dir:
          try:
            good_quality = float(resolution) <= 3 and float(r_factor) <= 0.3
          except ValueError:
            good_quality = False
            print "Couldn't parse resolution or r_factor:"
            print line
            print ""
          if good_quality and fname not in added_pdbs:
            added_pdbs.add(fname)
            fname = get_pdb_path(fname, pdbmirror_dir)
            if os.path.exists(fname):
              structure_files.append(fname)
            else:
              print "Structure not found in PDB mirror directory:", fname
        newchains = ""
        if hchain != "NA":
          newchains += hchain
        if lchain != "NA":
          newchains += lchain
        
        if fname in chain_map:
          chain_map[fname] += newchains
        else:
          chain_map[fname] = newchains
    finally:
      f.close()
  

freaddb = FreadDB(dbdir)
freaddb.verbose = True
freaddb.terminal_cutoff = terminal_cutoff
freaddb.save_db_options()

if not params.isOpt("indexonly"):
  structure_files.sort()
  for istructure, fname in enumerate(structure_files):
    chains_to_add = None
    if chain_map:
      names = [fname, os.path.basename(fname)]
      names.append(os.path.splitext(names[-1])[0])
      if names[-1].startswith("pdb"):
        names.append(names[-1][3:])
      for n in names:
        chains_to_add = chain_map.get(n, None)
        if chains_to_add:
          break
      if not chains_to_add:
        print "No chain selection found for PDB file. Skipping:", fname
        continue
      else:
        chains_to_add = "".join(sorted(set(chains_to_add)))
    print "Adding : %d/%d : %s" % (istructure+1, len(structure_files), fname)
    freaddb.add_structure(fname, chain_codes=chains_to_add)

if not params.isOpt("struconly"):
  for loop_length in range(min_loop_length, max_loop_length+1):
    freaddb.build_index(loop_length)
