Skip to content

Commit

Permalink
Add simple BLAST script that does not require AMPlify
Browse files Browse the repository at this point in the history
  • Loading branch information
dy-lin committed Jun 10, 2022
1 parent cc5817e commit 4fc6371
Showing 1 changed file with 191 additions and 0 deletions.
191 changes: 191 additions & 0 deletions scripts/blast_simple.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
#!/usr/bin/env bash
set -uo pipefail

FULL_PROGRAM=$0
PROGRAM=$(basename $FULL_PROGRAM)

if [[ "$PROGRAM" == "slurm_script" ]]; then
FULL_PROGRAM=$(scontrol show job $SLURM_JOBID | awk '/Command=/ {print $1}' | awk -F "=" '{print $2}')
PROGRAM=$(basename ${FULL_PROGRAM})
fi
args="$FULL_PROGRAM $*"

# 0 - table function
function table() {
if column -L <(echo) &>/dev/null; then
cat | column -s $'\t' -t -L
else
cat | column -s $'\t' -t echo
fi
}

# 1 - get_help function
function get_help() {
{
echo -e "PROGRAM: $PROGRAM\n"
echo "DESCRIPTION:"
echo -e "\
\tCharacterizes the novelty of AMPs using BLASTp. Does not join with AMPlify results.\n \
\n \
\tOUTPUT:\n \
\t-------\n \
\t - amps.blast.summary.novel.final.tsv\n \
\t - amps.blast.summary.known.final.tsv\n \
\n \
\tEXIT CODES:\n \
\t-----------\n \
\t - 0: successfully completed\n \
\t - 1: general errors\n \
\n \
" | table

echo "USAGE(S):"
echo -e "\
\t$PROGRAM [-o <outdir>] [-t <int>] -d <preformatted BLAST nr database> <AMP FASTA file>\n \
" | table

echo "OPTION(S):"
echo -e "\
\t-b <BLAST executable>\tPath to BLAST executable if not in PATH\n \
\t-d <database>\tPre-fromatted BLAST nr database\t(e.g. /path/to/nr)\n \
\t-h\tShow this help menu\n \
\t-o <directory>\tOutput directory\n \
\t-t <int>\tNumber of threads\t(default = 8)\n \
" | table
} 1>&2
exit 1
}

# 1.5 - print_line function
function print_line() {
if command -v tput &>/dev/null; then
end=$(tput cols)
else
end=50
fi
{
printf '%.0s=' $(seq 1 $end)
echo
} 1>&2
}

# 2 - print_error function
function print_error() {
{
echo -e "CALL: $args (wd: $(pwd))\n"
message="$1"
echo "ERROR: $message"
print_line
get_help
} 1>&2
}
# 3 - no arguments given
if [[ "$#" -eq 0 ]]; then
get_help
fi


# 4 - default parameters
outdir=$(pwd)
num_threads=8

if ! command -v blastp &>/dev/null; then
RUN_BLASTP=""
else
RUN_BLASTP=$(command -v blastp)
fi

while getopts :d:ho:t:b: opt; do
case $opt in
b) RUN_BLASTP=$(realpath $OPTARG);;
d) database=$(realpath $OPTARG);;
h) get_help ;;
o) outdir=$(realpath $OPTARG) ;;
t) num_threads=$OPTARG ;;
\?) print_error "Invalid option: -$OPTARG" ;;
esac
done

shift $((OPTIND-1))

# 5 - wrong arguments given
if [[ "$#" -ne 1 ]]; then
print_error "Incorrect number of arguments."
fi

# 6 - check inputs


if [[ ! -f $(realpath $1) ]]; then
print_error "Input file $(realpath $1) does not exist."
elif [[ ! -s $(realpath $1) ]]; then
print_error "Input file $(realpath $1) is empty."
fi

{
echo "HOSTNAME: $(hostname)"
echo -e "START: $(date)\n"

echo -e "PATH=$PATH\n"
echo "CALL: $args (wd: $(pwd))"
echo "THREADS: $num_threads"
echo
} 1>&2

mkdir -p $outdir

infile=$(realpath $1)
# NOW:
# database=/projects/btl/db/blast-20220119/nr

# sort the files into those <= 30 and those > 30
$RUN_BLASTP -version 2>&1 || exit 1

# blast it against NR database
seqtk comp $infile > $outdir/seqtk.comp.txt

seqtk subseq $infile <(awk '{if($2>30) print $1}' $outdir/seqtk.comp.txt) > $outdir/amps.gt30.faa
seqtk subseq $infile <(awk '{if($2<=30) print $1}' $outdir/seqtk.comp.txt) > $outdir/amps.short.faa

# blast it against NR database
echo "Running BLASTp..." 1>&2
$RUN_BLASTP -db $database -query $outdir/amps.gt30.faa -out $outdir/amps.gt30.tsv -task blastp -outfmt '6 qaccver saccver stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore qcovs' -num_threads $num_threads &> $outdir/blastp.log &
$RUN_BLASTP -db $database -query $outdir/amps.short.faa -out $outdir/amps.short.tsv -task blastp-short -outfmt '6 qaccver saccver stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore qcovs' -num_threads $num_threads &> $outdir/blastp-short.log &

wait
# add headers to combined blast

echo "Finished running BLASTP." 1>&2
echo "qaccver saccver stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore qcovs" | sed 's/ /\t/g' > $outdir/amps.blast.tsv
cat $outdir/amps.gt30.tsv $outdir/amps.short.tsv >> $outdir/amps.blast.tsv

echo "Known AMPs (100% pident and 100% qcovs) written to $outdir/known.txt and $outdir/known.faa" 1>&2
awk -F "\t" '{if($4==100 && $NF==100) print $1}' $outdir/amps.blast.tsv | sort -u > $outdir/known.txt

echo "Novel AMPs written to $outdir/novel.txt and $outdir/novel.faa" 1>&2
grep -vwf $outdir/known.txt <(awk '{print $1}' $outdir/seqtk.comp.txt) > $outdir/novel.txt

seqtk subseq $infile $outdir/known.txt > $outdir/known.faa
seqtk subseq $infile $outdir/novel.txt > $outdir/novel.faa

echo "qaccver saccver stitle pident qcovs" | sed 's/ /\t/g' > $outdir/amps.blast.novel.tsv
grep -wf $outdir/novel.txt $outdir/amps.blast.tsv | cut -f1-4,14 -d $'\t'>> $outdir/amps.blast.novel.tsv
echo "qaccver saccver stitle pident qcovs" | sed 's/ /\t/g' > $outdir/amps.blast.known.tsv
grep -wf $outdir/known.txt $outdir/amps.blast.tsv | cut -f1-4,14 -d$'\t' >> $outdir/amps.blast.known.tsv

# take top one for each JIRA ticket summary, and AMPlify results combination
echo "qaccver saccver stitle pident qcovs" | sed 's/ /\t/g' > $outdir/amps.blast.summary.novel.tsv
while read line; do
grep -w -m1 "$line" $outdir/amps.blast.tsv | cut -f1-4,14 -d $'\t' >> $outdir/amps.blast.summary.novel.tsv
done < $outdir/novel.txt
#
for i in $(grep -vwf <(awk '{print $1}' $outdir/amps.blast.novel.tsv) $outdir/novel.txt); do
echo -e "$i\tNA\tNA\tNA\tNA" >> $outdir/amps.blast.summary.novel.tsv
done

seqtk subseq $infile <(grep -vwf <(awk '{print $1}' $outdir/amps.blast.novel.tsv) $outdir/novel.txt) > $outdir/no_hits.faa

echo "qaccver saccver stitle pident qcovs" | sed 's/ /\t/g' > $outdir/amps.blast.summary.known.tsv
while read line; do
grep -w -m1 "$line" $outdir/amps.blast.tsv | cut -f1-4,14 -d $'\t' >> $outdir/amps.blast.summary.known.tsv
done < $outdir/known.txt

0 comments on commit 4fc6371

Please sign in to comment.