#!/bin/bash
# This shell script folds all RNA or DNA sequences in a single file
# and creates simple output files.

export _POSIX2_Version=0
export Package=mfold
export Version=3.6

# Abort subroutine
abort(){
    rm -f mfold.log  fort.*
    if [ $# -gt 0 ] ; then
	    echo -e "$1"
    fi
    echo "Job Aborted"
    exit 1
	}

if [ $# = 0 ]; then
  echo -e "Usage is\nmfold SEQ='file_name' with optional parameters:
   [ NA=RNA (default) or DNA ] [ LC=sequence type (default = linear) ]
   [ T=temperature (default = 37) ] [ P=percent (default = 5) ]
   [ NA_CONC=Na+ molar concentration (default = 1.0) ]
   [ MG_CONC=Mg++ molar concentration (default = 0.0) ]
   [ W=window parameter (default - set by sequence length) ]
   [ MAXBP=max base pair distance (default - no limit) ]
   [ MAX=maximum number of foldings to be computed per sequence (default 50) ]"
  exit 2
fi

# Write header
echo "$Package version $Version"

# Set default values.
SEQ=/dev/null
NA=RNA
LC=linear
T=default
P=5
W=-1
MAXBP="no limit"
MAX=50
START=1
STOP=30000

# Process the command line arguments 1 at a time.
COUNT=$#
while [ $COUNT != 0 ]; do
  if [ `echo $1 | cut -d= -f1` = "SEQ" ]; then
    SEQ=`echo $1 | cut -d= -f2`
  elif [ `echo $1 | cut -d= -f1` = "NA" ]; then
    NA=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "LC" ]; then
    LC=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "T" ]; then
    T=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "NA_CONC" ]; then
    NA_CONC=`echo $1 | cut -d= -f2`
  elif [ `echo $1 | cut -d= -f1` = "MG_CONC" ]; then
    MG_CONC=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "P" ]; then
    P=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "W" ]; then
    W=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "MAXBP" ]; then
    MAXBP=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "MAX" ]; then
    MAX=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "START" ]; then
    START=`echo $1 | cut -d= -f2`     
  elif [ `echo $1 | cut -d= -f1` = "STOP" ]; then
    STOP=`echo $1 | cut -d= -f2`     
  else
    echo "Invalid entry: " $1 "on command line."    
    exit
  fi
COUNT=`expr $COUNT - 1`
shift
done
NA_CONC=${NA_CONC:-1.0}
MG_CONC=${MG_CONC:-0.0}

# Check for sequence
if [ ! -s $SEQ ]; then
  echo "Sequence has not been defined or it does not exist."
  exit 1
fi
NUM=`echo $SEQ | tr "." " " | wc | tr -s " " " " | cut -d" " -f3`
SUFFIX=`echo $SEQ | cut -d"." -f$NUM`
if [ $NUM -gt 1 -a $SUFFIX = seq ]; then
  NUM=`expr $NUM - 1`
  FILE_PREFIX=`echo $SEQ | cut -d"." -f1-$NUM`
else
  FILE_PREFIX=$SEQ
fi

LOGFILE=$FILE_PREFIX.log
rm -f $LOGFILE

# Generate energy tables for temperature T
# EXT is extension for energy file names
# T=default means that T was not specified on the command line.
# This yields version 3.0 energies at 37 degrees (.dat) for RNA
E_Version=2.3
if [ $T = default ]; then
   T=37
   if [ $NA = RNA ]; then
      EXT=dat
      E_Version=3.0
   else
      EXT=37
   fi
else
   if [ $T -lt 0 ]; then
      T=0
   elif [ $T -gt 100 ]; then
      T=100
   fi
   EXT=$T
fi

ARG1=`echo $LC | cut -c1`

if [ $EXT = dat ]; then
   echo "Folding at 37 degrees using version 3.0 dat files."
else
newtemp >> $LOGFILE 2>&1 <<EOF || abort "newtemp failed"
$NA
$T
$NA_CONC
$MG_CONC
EOF
   if [ $NA = RNA ]; then
      echo "RNA free energy files (version 2.3) at $T degrees created."
   else
      echo "DNA free energy files (version 3.0) at $T degrees created."
   fi
fi

# Fold the sequences
nafold $ARG1 text >> $LOGFILE 2>&1 << EOF
0
2
$SEQ
$P
$MAX
$WINDOW
asint1x2.$EXT
asint2x3.$EXT
dangle.$EXT
loop.$EXT
miscloop.$EXT
sint2.$EXT
sint4.$EXT
sint6.$EXT
stack.$EXT
tloop.$EXT
triloop.$EXT
tstackh.$EXT
tstacki.$EXT
y
n
$FILE_PREFIX.out
y
$FILE_PREFIX.ct
y
$FILE_PREFIX.det
10
EOF

if [ $E_Version = 2.3 -o $NA = DNA ]; then
   if [ $NA = RNA ]; then
      DH=dh
   else
      DH=dhd
   fi
efn <<EOF > $FILE_PREFIX.dh 2>&1
$LorC
$FILE_PREFIX.ct
n
n
asint1x2.$DH
asint2x3.$DH
dangle.$DH
loop.$DH
miscloop.$DH
sint2.$DH
sint4.$DH
sint6.$DH
stack.$DH
tloop.$DH
triloop.$DH
tstackh.$DH

tstacki.$DH
EOF

  add_dHdSTm $FILE_PREFIX.out $FILE_PREFIX.dh $T text >> $LOGFILE 2>&1 
  mv $FILE_PREFIX.dHdSTm $FILE_PREFIX.out >> $LOGFILE 2>&1 
  add_dHdSTm $FILE_PREFIX.det $FILE_PREFIX.dh $T text >> $LOGFILE 2>&1 
  mv $FILE_PREFIX.dHdSTm $FILE_PREFIX.det >> $LOGFILE 2>&1 

# Add NA type, temperature and salt header to .out and .det file.
  HEADER=`echo -e "$LorC $NA folding at ${T} C. [Na+] = $NA_CONC, [Mg++] = $MG_CONC.\n"`
  echo "$HEADER" > $FILE_PREFIX.temp 
  cat $FILE_PREFIX.out >> $FILE_PREFIX.temp 
  mv $FILE_PREFIX.temp $FILE_PREFIX.out 
  echo "$HEADER" > $FILE_PREFIX.temp 
  cat $FILE_PREFIX.det >> $FILE_PREFIX.temp 
  mv $FILE_PREFIX.temp $FILE_PREFIX.det

fi

# Cleanup
rm -f mfold.log  fort.* *.$EXT

echo "All done."

