forked from jrjhealey/bioinfo-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ACTgenerator.sh
executable file
·150 lines (124 loc) · 4.95 KB
/
ACTgenerator.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/bin/bash
# A small script to generate artemis comparison files (nucleic acid comparison)
# since all the webservers are apparently defunct!
set -eo pipefail
if [ -t 1 ] ; then
ncols=$(tput colors)
if [ -n "$ncols" ] && [ "$ncols" -ge 8 ] ; then
bold="$(tput bold)"
underline="$(tput smul)"
rmunderline="$(tput rmul)"
standout="$(tput smso)"
black="$(tput setaf 0)"
red="$(tput setaf 1)"
green="$(tput setaf 2)"
yellow="$(tput setaf 3)"
blue="$(tput setaf 4)"
magenta="$(tput setaf 5)"
cyan="$(tput setaf 6)"
white="$(tput setaf 7)"
default="$(tput sgr0)"
fi
fi
log(){
# Logging function (prints to STDOUT in WHITE).
echo -e >&1 "${white}${underline}INFO:${rmunderline} ${1:-$(</dev/stdin)}${default}"
}
err(){
# Error function (prints to STDERR in RED).
echo -e >&2 "${red}${underline}ERROR:${rmunderline} ${1:-$(</dev/stdin)}${default}"
}
warn(){
# Warning function (prints to STDOUT in YELLOW/ORANGE).
echo -e >&1 "${yellow}${underline}WARNING:${rmunderline} ${1:-$(</dev/stdin)}${default}"
}
usage(){
# Capture inputs
cat << EOF >&2
usage: $0 options
This script generates the necessary BLAST comparison file
for use with the Artemis Comparison tool, when comparing 2 genomes.
A typical invocation might look like:
$ bash ACTgenerator.sh -r reference.fasta -q query.fasta -d databasename -o outputdir -t
OPTIONS:
-h | --help Show this message.
-r | --ref Reference fasta sequence.
-q | --query Query fasta sequence.
-d | --database Name of the BLAST database comparison file.
-o | --outdir Directory to output all the files to.
-t | --tidy Make the script tidy up after itself (T/F).
${red}(Default True.)${default}
This script will take 2 input sequences (in FASTA at present) and creates the BLAST
comparison file that is used by the ARTEMIS Comparison tool (ACT). At the moment, it
only performs a 1-vs-1 comparison, but may be expanded in the future.
By default, the script requires only input query and reference sequences, with all
other options taking on default values. The script will also remove any intermediate
files by default (that includes if not specified. To retain these files provide the -t
flag with a "False" argument (case insensitive).
EOF
}
# Tolerate long arguments
for arg in "$@"; do
shift
case "$arg" in
"--help") set -- "$@" "-h" ;;
"--reference") set -- "$@" "-r" ;;
"--query") set -- "$@" "-q" ;;
"--database") set -- "$@" "-d" ;;
"--outdir") set -- "$@" "-o" ;;
"--tidy") set -- "$@" "-t" ;;
*) set -- "$@" "$arg" ;;
esac
done
# getopts assigns the arguments to variables
while getopts "hr:q:d:o:t:" OPTION ; do
case $OPTION in
r) reference=$OPTARG ;;
q) query=$OPTARG ;;
d) database=$OPTARG ;;
o) outdir=$OPTARG ;;
t) tidy=$OPTARG ;;
h) usage; exit 0 ;;
esac
done
# If no args, show help
if [[ $# -eq 0 ]] ; then
usage
exit 1
fi
if [[ -z $reference ]]; then
usage ; err "No Reference sequence provided. Exiting." ; exit 1
fi
if [[ -z $query ]]; then
usage ; err "Query not supplied. Exiting." ; exit 1
fi
if [[ -z $database ]]; then
database="${reference%.*}_DB"
warn "No database name was specified. Using the reference sequence name and appending _DB."
fi
if [[ -z $outdir ]]; then
outdir=$(pwd)
warn "No output directory was specified. Defaulting to the current working directory: $outdir"
fi
#####
# Step 1: Make a BLAST database of the reference sequence:
# Script requires blastn (NOT LEGACY BLAST) and makeblastdb in path - check for existence:
command -v makeblastdb >/dev/null 2>&1 || { err "makeblastdb doesn't appear to be installed. Aborting."; exit 1; }
command -v blastn >/dev/null 2>&1 || { err "BLAST+ doesn't appear to be installed. Aborting."; exit 1; }
log "Running makeblastdb:"
log " -> makeblastdb -in "$reference" -dbtype 'nucl' -title "$database" -out "${outdir%/}"/"${database}" -parse_seqids"
makeblastdb -in "$reference" -dbtype 'nucl' -title "$database" -out "${outdir%/}"/"${database}" -parse_seqids | \
sed '2,$s/^/\ /g' | log
log "Database created."
# Step 2: Perform the all-vs-all BLAST using the query sequence and reference database.
log " -> blastn -db "${outdir%/}"/"${database}" -query "$query" -outfmt 6 -out "${outdir%/}"/"${reference%.*}"_vs_"${query%.*}".act"
blastn -db "${outdir%/}"/"${database}" -query "$query" -outfmt 6 -out "${outdir%/}"/"${reference%.*}"_vs_"${query%.*}".act
log "All finished! The comparison file is called: ${reference%.*}_vs_${query%.*}.act"
if [[ $tidy =~ ^[Tt][Rr][Uu][Ee]$ ]] || [[ $tidy =~ ^[Tt]$ ]] || [[ -z $tidy ]] ; then
warn "Tidying database files from ${outdir}."
rm -v "${outdir%/}"/"${database}"* | sed '2,$s/^/\ /g' | warn
elif [[ $tidy =~ ^[Ff][Aa][Ll][Ss][Ee]$ ]] || [[ $tidy =~ ^[Ff]$ ]] ; then
: # Do nothing if false-y
else
err 'Unrecognised argument to tidy (should be T(rue), F(alse) or empty). Leaving files unmodified.'
fi