#!/bin/bash
#
# MEDELLER pipeline
# Coded by Sebastian Kelm
#          JP (Slight modifications)
# Created on 19 September 2012
#
# Copyright 2008-2012 Sebastian Kelm <kelm@stats.ox.ac.uk>
#
#



################################################################################
# General use functions
################################################################################

function abspath {
	if [[ -d "$1" ]]; then
		pushd "$1" >/dev/null
		pwd
		popd >/dev/null
	elif [[ -e "$1" ]]; then
		pushd $(dirname "$1") >/dev/null
		echo $(pwd)/$(basename "$1")
		popd >/dev/null
	else
		echo "Error: '$1' does not exist!" 1>&2
		return 127
	fi
}



################################################################################
# Set up environment variables
################################################################################

MAINDIR=`abspath "$0"`
MAINDIR=`dirname "$MAINDIR"`
MAINDIR=`dirname "$MAINDIR"`

BINPATH="$MAINDIR/bin"
LIBPATH="$MAINDIR/lib-python"
SUBSTTABLEPATH="$MAINDIR/substtables"
BLASTDATAPATH="$MAINDIR/blastdata"
MPTPATH=`abspath "$MAINDIR/../MPT/bin"`

export PYTHONPATH="$LIBPATH:$PYTHONPATH"
export PATH="$BINPATH:$MPTPATH:$PATH"



################################################################################
# Parse command line options
################################################################################

if ! [ "$5" ]; then
  myname=`basename "$0"`
  echo "
  USAGE:
    $myname <target_filename> <template_filename> <template_chain> <blastdb> <outdir> [threads [medeller_flags [imembrane_flags]]]
    " 1>&2
  exit 1
fi


targetfile=`abspath "$1"`
templatefile=`abspath "$2"`
templatechain="$3"
blastdb=`abspath "$4"` # /data/pegasus/hill/BLAST/ncbi-blast-2.2.25+/bin/db/uniref90.fasta
outdir="$5"
threads="$6"
medeller_opts="$7"
imembrane_opts="$8"

templateid="template"


if [ ${#templatechain} != 1 ] && [ ${#templatechain} != 0 ]; then
  echo "Error: template_chain must be a single character, or '' in the case of a single unnamed chain. You specified: '$templatechain'" 1>&2
  exit 1
fi


if ! [ "$threads" ]; then
  threads=1
fi


if ! [ -f "$outdir/ignoreexisting" ]; then

  #if [ -f "$outdir/input/$templateid$templatechain.pdb" ]; then
  #  echo "Error: Pre-processed template structure exists. Delete any output files or specify a different output directory." 1>&2
  #  exit 99
  #fi



  if [ -s "$outdir/imembrane/tophit/query_annotation.tem" ]; then
    echo "Error: iMembrane annotation of template exists. Delete any output files or specify a different output directory." 1>&2
    exit 99
  fi



  if [ -s "$outdir/MPT/MPT_output.fasta" ]; then
    echo "Error: target-template alignment exists. Delete any output files or specify a different output directory." 1>&2
    exit 99
  fi



  if [ -s "$outdir/annotated_alignment.tem" ]; then
    echo "Error: annotated alignment exists. Delete any output files or specify a different output directory." 1>&2
    exit 99
  fi



  if [ -s "$outdir/medeller/annotated_core/query_annotation.tem" ]; then
    echo "Error: Annotated MEDELLER models exist. Delete any output files or specify a different output directory." 1>&2
    exit 99
  fi



  if [ -s "$outdir/medeller/model.core.pdb" ]; then
    echo "Error: MEDELLER models exist. Delete any output files or specify a different output directory." 1>&2
    exit 99
  fi

fi



################################################################################
# Perform basic sanity checks on the target and template files:
#     - missing or empty files?
#     - more than one sequence present in target file?
#     - target contains non-standard amino acids?
#     - extract target sequence's FASTA title
################################################################################

if ! [ -s "$targetfile" ]; then
  echo "Error: Target file empty or not found: $targetfile" 1>&2
  exit 1
fi

if ! [ -s "$templatefile" ]; then
  echo "Error: Template file empty or not found: $templatefile" 1>&2
  exit 1
fi

numtargetseqs=`grep -c '^>' "$targetfile"`
if [ "$numtargetseqs" != 1 ]; then
  echo "Error: Target file contains $numtargetseqs sequences. Must contain 1 sequence: $targetfile" 1>&2
  exit 2
fi

specialcharacters=$(grep -v '^>' "$targetfile" | grep -Po '[^ACDEFGHIKLMNPQRSTVWY\s]' | xargs echo | tr -d ' ')
if [ "$specialcharacters" ]; then
  echo "Error: Target file contains ${#specialcharacters} non-standard amino acids: '$specialcharacters'. Please substitute them with standard amino acids or cut down your target sequence. Path to target file: $targetfile" 1>&2
  exit 2
fi


mkdir -p "$outdir"
outdir=`abspath "$outdir"`

# Process the name in the target fasta file with some "clever" sed
sed 's/^>.*$/>target/g' "$targetfile" >"$outdir/target.fasta"
targetfile="$outdir/target.fasta"

targetid=`grep -m1 '^>' "$targetfile" | tr -d ' \t\n\r'`
targetid=${targetid:1}
if ! [ "$targetid" ]; then
  echo "Error: Target file does not have a FASTA-style heading: $targetfile" 1>&2
  exit 2
fi
#echo "Extracted targetid from FASTA file: '$targetid'"



################################################################################
# Perform basic sanity checks on target and template sequences:
#     - 100% identical?
#     - length1 / length2 < 2/3 or > 3/2 ?
#     - template contains non-standard amino acids?
################################################################################

success=0
mkdir -p "$outdir/input"
prefix="$outdir/input/$templateid"

#if [ -f "$prefix$templatechain.pdb" ]; then
#  echo "Error: Pre-processed template structure exists. Delete any output files or specify a different output directory." 1>&2
#  exit 99
#fi

cp -f "$templatefile" "$prefix.pdb"
filelist=`splitchain -pfF --prefix "$prefix" --seqtitle "$templateid" "$prefix.pdb"`
if ! [ -f "$prefix$templatechain.fasta" ]; then
  echo "Error: Failed to extract sequence of chain '$templatechain' from template PDB file. Check that it is a valid PDB file and contains the specified chain: $templatefile" 1>&2
  exit 2
fi
if ! [ -f "$prefix$templatechain.pdb" ]; then
  echo "Error: Failed to extract structure of chain '$templatechain' from template PDB file. Check that it is a valid PDB file and contains the spefified chain: $templatefile" 1>&2
  exit 2
fi
templateid="$templateid$templatechain"
templatefile="$prefix$templatechain.pdb"


specialcharacters=$(grep -v '^>' "$prefix$templatechain.fasta" | grep -Po '[^ACDEFGHIKLMNPQRSTVWY\s]' | xargs echo | tr -d ' ')
if [ "$specialcharacters" ]; then
  echo "Error: Template file contains ${#specialcharacters} non-standard amino acids: '$specialcharacters'. Please substitute them with standard amino acids or cut down your target structure. Path to template file: $templatefile" 1>&2
  exit 2
fi


seq_comparison=(`fasta2ali "$targetfile" "$prefix$templatechain.fasta" | pid -b -`)
identity=${seq_comparison[0]}
length1=${seq_comparison[3]}
length2=${seq_comparison[4]}

if (( $length1 < 40 )); then
  echo "Error: Target sequence must be at least 40 residues long" 1>&2
  exit 2
fi
if (( $length2 < 40 )); then
  echo "Error: Template structure must be at least 40 residues long" 1>&2
  exit 2
fi

if ! [ "$length2" ]; then
  echo "Error: Something went wrong when trying to assess target-template similarity" 1>&2
  exit 2
fi

echo "Extracted chain '$templatechain' from input file: $templateid"
echo "Estimated target-template percent identity: $identity"
echo "Sequence lengths: target=$length1, template=$length2"
echo

if [ "$identity" == "100.00" ]; then
  mkdir -p "$outdir/medeller"
  cp -f "$templatefile" "$outdir/medeller/model.core.pdb"
  cp -f "$outdir/medeller/model.core.pdb" "$outdir/medeller/model.hiacc.pdb"
  cp -f "$outdir/medeller/model.core.pdb" "$outdir/medeller/model.hicov.pdb"
  cp -f "$outdir/medeller/model.core.pdb" "$outdir/medeller/model.complete.pdb"
  
  echo "Target and template are identical. Copying template structure in its entirety and skipping modelling process."
  echo
  echo "Models can be found in $outdir/medeller:"
  pushd "$outdir/medeller" >/dev/null
  ls model.*.pdb
  popd >/dev/null
  
  echo "--- The End ---"
  exit
fi


length_difference=`perl -e "if ($length1 > $length2) {print $length1 / $length2} else {print $length2 / $length1};"`
success=`perl -e "if ($length_difference >= 1.5) {print 1;} else {print 0;}"`

if [ $success -ne 0 ]; then
  echo "Error: Lengths of target ($length1) and template ($length2) sequences differ by more than 50% of the shorter sequence (length of longer seq is $length_difference * length of shorter seq). Please consider choosing a different template or trimming the longer protein." 1>&2
  exit 3
fi



if ! [ -s "$outdir/imembrane/tophit/query_annotation.tem" ]; then
  
  ################################################################################
  # Annotate template structure with iMembrane (and JOY)
  ################################################################################
  
  imembrane --verbose --transmembrane --maxhits 1 --joy --inputid "$templateid" --struc "$templatefile" --outdir "$outdir/imembrane" $imembrane_opts


  success=$?
  if (( $success != 0 )); then
    echo "Error: iMembrane did not finish running. Cannot annotate membrane insertion, which is required for model building." 1>&2
    exit 5
  fi

  if ! [ -s "$outdir/imembrane/hits.table" ]; then 
    echo "Error: No iMembrane hits found for template structure. Cannot annotate membrane insertion, which is required for model building." 1>&2
    exit 6
  fi

  if ! [ -s "$outdir/imembrane/tophit/query_annotation.tem" ]; then
    echo "Error: No iMembrane hits found for template structure. Cannot annotate membrane insertion, which is required for model building." 1>&2
    exit 6
  fi

  ln -sf "imembrane/tophit/query_annotation.tem" "$outdir/$templateid.tem"
  #ln -sf "imembrane/tophit/layer.atm" "$outdir/$templateid.pdb"
  ln -sf "$templatefile" "$outdir/$templateid.pdb"

#else
#  #echo "iMembrane annotation of template exists. Skipping."
#  echo "Error: iMembrane annotation of template exists. Delete any output files or specify a different output directory." 1>&2
#  exit 99
fi



if ! [ -s "$outdir/MPT/MPT_output.fasta" ]; then
  
  ################################################################################
  # Align target sequence to annotated template structure
  ################################################################################

  mkdir -p "$outdir/MPT"
  ln -sf "$outdir/$templateid.tem" "$outdir/MPT/structure.tem"
  cp -f "$targetfile" "$outdir/MPT/sequence.fasta"
  MPTpipeline "$outdir/MPT" "$blastdb" "$threads"
  if ! [ -s "$outdir/MPT/MPT_output.fasta" ]; then
    echo "Error: MP-T failed to align target sequence and template structure." 1>&2
    exit 7
  fi

  # TODO: Make sure that MP-T did not remove either input sequence from its input
  #       Maybe this should be done inside MPTpipeline

#else
#  #echo "Target-template alignment (generated by MPT) exists. Skipping."
#  echo "Error: target-template alignment exists. Delete any output files or specify a different output directory." 1>&2
#  exit 99
fi



if ! [ -s "$outdir/annotated_alignment.tem" ]; then

  ################################################################################
  # Merge alignment with template annotation
  ################################################################################
  
  temcat "$outdir/MPT/MPT_output.fasta" "$outdir/$templateid.tem" > "$outdir/annotated_alignment.tem"

  success=$?
  if (( $success != 0 )); then
    echo "Error: Failed to merge template annotation with input alignment.  This may be a bug. Please contact the sys-admin or the author." 1>&2
    exit 7
  fi

#else
#  #echo "iMembrane-annotated target-template alignment exists. Skipping."
#  echo "Error: annotated alignment exists. Delete any output files or specify a different output directory." 1>&2
#  exit 99
fi



if ! [ -s "$outdir/medeller/annotated_core/query_annotation.tem" ]; then
  
  if ! [ -s "$outdir/medeller/model.core.pdb" ]; then
    ################################################################################
    # Model the 3D structure of the target, based on the annotated alignment
    ################################################################################

    medeller $medeller_opts --verbose --prefix "$outdir/medeller/model" --substtables "$SUBSTTABLEPATH" -a "$outdir/annotated_alignment.tem" -i "$targetid" --pairs "$templateid" "$outdir/$templateid.pdb"

    success=$?
#  else
#    #echo "MEDELLER models exist, but not yet annotated..."
#    echo "Error: MEDELLER models exist. Delete any output files or specify a different output directory." 1>&2
#    exit 99
  fi
  
  ################################################################################
  # Annotate the MEDELLER model(s) using the iMembrane annotation of the template
  ################################################################################

  pushd "$outdir/medeller" >/dev/null
  a=(model.{core,hiacc,hicov,complete}.pdb)
  popd >/dev/null
  for model in ${a[@]}; do
    if [ -f "$model" ]; then
      modeltype=`echo "$model" | cut -d. -f2`
      if ! [ -d "$outdir/medeller/annotated_$modeltype" ]; then
        imem_project --dbdir "$outdir/imembrane" --insubdir "tophit" --dbid "$templateid" --inputid "$targetid" --struc "$outdir/medeller/$model" --outsubdir "$outdir/medeller/annotated_$modeltype" --joy
      fi
    fi
  done

  if (( $success != 0 )); then
    echo "Error: MEDELLER did not finish running." 1>&2
    exit 8
  fi

#else
#  #echo "Annotated MEDELLER models exist. Skipping."
#  echo "Error: Annotated MEDELLER models exist. Delete any output files or specify a different output directory." 1>&2
#  exit 99
fi

echo
echo "Models can be found in $outdir/medeller:"
pushd "$outdir/medeller" >/dev/null
ls model.*.pdb
popd >/dev/null
echo "--- The End ---"
