Skip to content

Commit

Permalink
Merge pull request #3 from fmalmeida/dev
Browse files Browse the repository at this point in the history
added minimap and options to configure plot
  • Loading branch information
fmalmeida authored Dec 20, 2022
2 parents e1c52c2 + c8a4ef9 commit bcd581f
Show file tree
Hide file tree
Showing 10 changed files with 118 additions and 19 deletions.
37 changes: 36 additions & 1 deletion recipe/bin/plot_circos
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Simple script to create a circos plot between two FASTA files.

######################
Expand Down Expand Up @@ -34,6 +34,12 @@ export GCWINDOW=5000
export GCSTEP=5000
export BACANNOT="no"
export SKIP_LINKS="no"
export USE_MINIMAP2="no"
export MINIMAP2_METHOD="asm20"
export MAX_TICKS="5000"
export MAX_IDEOGRAMS="200"
export MAX_LINKS="50000"
export MAX_POINTS_PER_TRACK="50000"

######################################
### Function to filter FASTA files ###
Expand All @@ -55,6 +61,11 @@ source ${SCRIPT_DIR}/../src/find_links.sh
#########################################################################
source ${SCRIPT_DIR}/../src/parse_links.sh

########################################
### Function to use minimap2 instead ###
########################################
source ${SCRIPT_DIR}/../src/minimap2.sh

#############################################################################
### Function to sort and remove duplicates from links and karyotype files ###
#############################################################################
Expand Down Expand Up @@ -234,6 +245,30 @@ case $ARGS in
export SKIP_LINKS="yes"
shift
;;
--use_minimap2)
export USE_MINIMAP2="yes"
shift
;;
--minimap2_method)
export MINIMAP2_METHOD=$2
shift 2
;;
--max_ticks)
export MAX_TICKS=$2
shift 2
;;
--max_ideograms)
export MAX_IDEOGRAMS=$2
shift 2
;;
--max_links)
export MAX_LINKS=$2
shift 2
;;
--max_points_per_track)
export MAX_POINTS_PER_TRACK=$2
shift 2
;;
*)
printf "******************************\n"
printf "Error: Invalid argument $1\n"
Expand Down
4 changes: 3 additions & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "easy_circos" %}
{% set version = "0.3" %}
{% set version = "0.4" %}

package:
name: "{{ name|lower }}"
Expand All @@ -22,6 +22,7 @@ requirements:
- perl-app-cpanminus
- perl-module-build
- blast
- minimap2
run:
- perl
- circos
Expand All @@ -32,6 +33,7 @@ requirements:
- perl-app-cpanminus
- perl-module-build
- blast
- minimap2

about:
home: "https://github.com/fmalmeida/easy_circos_plot"
Expand Down
1 change: 0 additions & 1 deletion recipe/src/filter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do
$CONDA_PREFIX/bin/perl $CONDA_PREFIX/bin/removesmalls.pl $MINLEN $FASTA >> ${RESULTS}/filtered/"$name" ;
continue
done<"$FOFN"

}
10 changes: 5 additions & 5 deletions recipe/src/find_links.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
find_links()
{
# create dir
mkdir -p ${RESULTS}/all_vs_all_blast
mkdir -p ${RESULTS}/all_vs_all_links

# concatenate genomes
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db
export BLAST_DB=${RESULTS}/all_vs_all_links/blast_db

# Run blast
makeblastdb -in $CONCAT_FASTA -dbtype nucl -out $BLAST_DB &> /dev/null ;
blastn -task blastn -perc_identity $MINID -query $CONCAT_FASTA -db $BLAST_DB \
-outfmt "6 qseqid qstart qend sseqid sstart send pident length mismatch gapopen evalue bitscore stitle" \
-out ${RESULTS}/all_vs_all_blast/tmp.blast -num_threads $THREADS
-out ${RESULTS}/all_vs_all_links/tmp.blast -num_threads $THREADS

# Filter blast
awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_blast/tmp.blast > ${RESULTS}/all_vs_all_blast/all_vs_all.blast
awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_links/tmp.blast > ${RESULTS}/all_vs_all_links/all_vs_all.aln.txt

# Remove tmp
rm ${RESULTS}/all_vs_all_blast/tmp.blast
rm ${RESULTS}/all_vs_all_links/tmp.blast
}
20 changes: 17 additions & 3 deletions recipe/src/help.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,20 @@ Copyright, Felipe Almeida <[email protected]>, 2021
# Input min. length
--minlen Min size of contigs to consider for plot [Default: 10000]
# Links (blastn) min. percentage id
# Links configurations
--skip_links Do not compute blast and do not draw links.
Useful for when only desiring the configs. [Default: false]
--use_minimap2 Compute links with minimap2. This is only useful for big genomes.
With small genomes probably no align block will be detected.
For minimap2, --minid and --linklen have no effect. By default uses blastn.
--minimap2_method Select alignment "method" / "algorithm" for minimap2.
Options: asm5 | asm10 | asm20. [Default: asm20]
--minid Min. percentage id to filter the results of blastn to draw links [Default: 85]
--minid Min. percentage id to filter the results of blastn (only for blastn) to draw links [Default: 85]
--linklen Min. link (blastn hit) length to display in plot [Default: 5000]
--linklen Min. length of blastn hits (only for blastn) length to display in plot [Default: 5000]
--show_intrachr Tells the program to create a conf file showing intra chr links [Default: false]
Mandatory if using only one FASTA, otherwise, links will not be shown.
Expand All @@ -60,6 +67,13 @@ Copyright, Felipe Almeida <[email protected]>, 2021
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files.
The first column must be the name (ID) of the contig.
Checkout the "--gff2tiles" script (below).
# Housekeeping conf
--max_ticks Max number of ticks allowed to plot. [Default: 5000]
--max_ideograms Max number of ideograms allowed to plot. [Default: 200]
--max_links Max number of links allowed to plot. [Default: 50000]
--max_points_per_track Max number of points per track (e.g. histogram) allowed to plot. [Default: 50000]
# Helpful scripts!
Expand Down
33 changes: 33 additions & 0 deletions recipe/src/minimap2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
minimap_links()
{
# create dir
mkdir -p ${RESULTS}/all_vs_all_links ${RESULTS}/conf

# concatenate genomes
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta

# run minimap2
minimap2 \
-c --cs -t $THREADS \
-x asm20 -o ${RESULTS}/all_vs_all_links/all_vs_all.aln.txt \
$CONCAT_FASTA $CONCAT_FASTA 2> /dev/null

# parse paf
cut -f 1,3,4,6,8,9 ${RESULTS}/all_vs_all_links/all_vs_all.aln.txt >> ${RESULTS}/conf/links_concatenated.txt

# get links comming from contigs and give it colors
IFS=','
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do
bioawk -c fastx '{ printf $name"\n" }' $FASTA > tmp_names.fasta ;
awk -v color1=$FASTA_COLOR -F'\t' 'NR==FNR{c[$1]++;next};c[$1] > 0 {print $0 "\t" "color="color1}' \
tmp_names.fasta ${RESULTS}/conf/links_concatenated.txt >> ${RESULTS}/conf/links_concatenated_colored.txt
rm tmp_names.fasta ;
done<"$FOFN"

# create additional file whithout intrachr links
awk \
-F'\t' \
'{ if ($1 != $4) { print } }' \
${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ;
}
4 changes: 2 additions & 2 deletions recipe/src/parse_links.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mkdir -p ${RESULTS}/conf

# Filter blocks with more then N bp hits
awk -v minlen=$MINLINKLEN '{ if ($8 >= minlen) { print } }' \
${RESULTS}/all_vs_all_blast/all_vs_all.blast | cut -f 1,2,3,4,5,6 >> ${RESULTS}/conf/links_concatenated.txt ;
${RESULTS}/all_vs_all_links/all_vs_all.aln.txt | cut -f 1,2,3,4,5,6 >> ${RESULTS}/conf/links_concatenated.txt ;

# get links comming from contigs and give it colors
IFS=','
Expand All @@ -25,7 +25,7 @@ empty_links()
# concatenate genomes
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ;
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db
export BLAST_DB=${RESULTS}/all_vs_all_links/blast_db

touch ${RESULTS}/conf/links_concatenated_colored.txt ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt
}
8 changes: 8 additions & 0 deletions recipe/src/plot_circos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ CURRENT_DIR=$PWD
# got to conf dir
cd ${RESULTS}/conf/

# get etc/files
cp -r $(which circos | sed 's|/circos$|/../etc|g') .

sed -i "s/max_ticks.*= 5000/max_ticks = ${MAX_TICKS}/" etc/housekeeping.conf
sed -i "s/max_ideograms.*= 200/max_ideograms = ${MAX_IDEOGRAMS}/" etc/housekeeping.conf
sed -i "s/max_links.*= 25000/max_links = ${MAX_LINKS}/" etc/housekeeping.conf
sed -i "s/max_points_per_track.*= 25000/max_points_per_track = ${MAX_POINTS_PER_TRACK}/" etc/housekeeping.conf

# draw
circos

Expand Down
14 changes: 11 additions & 3 deletions recipe/src/workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,17 @@ workflow()
# Step 3
if [ "$SKIP_LINKS" = "no" ]
then
echo " # Finding links (all vs all blastn)!"
find_links ;
parse_links ;

if [ "$USE_MINIMAP2" = "yes" ]
then
echo " # Finding links (all vs all minimap2)!"
minimap_links ;
else
echo " # Finding links (all vs all blastn)!"
find_links ;
parse_links ;
fi

else
echo " # Skipping links (all vs all blastn)!"
empty_links ;
Expand Down
6 changes: 3 additions & 3 deletions recipe/src/write_circos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ write_circos()
cat << EOF
# MINIMUM CIRCOS CONFIGURATION
# debugging, I/O an dother system parameters
<<include etc/housekeeping.conf>> # included from Circos distribution
# Defines unit length for ideogram and tick spacing, referenced
# using "u" prefix, e.g. 10u
chromosomes_units = 500000
Expand Down Expand Up @@ -94,9 +97,6 @@ ${TILES_CONF}
</plots>
# debugging, I/O an dother system parameters
<<include etc/housekeeping.conf>> # included from Circos distribution
show_ticks = yes
show_tick_labels = yes
Expand Down

0 comments on commit bcd581f

Please sign in to comment.