-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from fmalmeida/dev
added minimap and options to configure plot
- Loading branch information
Showing
10 changed files
with
118 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,22 @@ | ||
find_links() | ||
{ | ||
# create dir | ||
mkdir -p ${RESULTS}/all_vs_all_blast | ||
mkdir -p ${RESULTS}/all_vs_all_links | ||
|
||
# concatenate genomes | ||
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ; | ||
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta | ||
export BLAST_DB=${RESULTS}/all_vs_all_blast/blast_db | ||
export BLAST_DB=${RESULTS}/all_vs_all_links/blast_db | ||
|
||
# Run blast | ||
makeblastdb -in $CONCAT_FASTA -dbtype nucl -out $BLAST_DB &> /dev/null ; | ||
blastn -task blastn -perc_identity $MINID -query $CONCAT_FASTA -db $BLAST_DB \ | ||
-outfmt "6 qseqid qstart qend sseqid sstart send pident length mismatch gapopen evalue bitscore stitle" \ | ||
-out ${RESULTS}/all_vs_all_blast/tmp.blast -num_threads $THREADS | ||
-out ${RESULTS}/all_vs_all_links/tmp.blast -num_threads $THREADS | ||
|
||
# Filter blast | ||
awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_blast/tmp.blast > ${RESULTS}/all_vs_all_blast/all_vs_all.blast | ||
awk -F '\t' -v minid=$MINID '{ if ($7 >= minid) { print } }' ${RESULTS}/all_vs_all_links/tmp.blast > ${RESULTS}/all_vs_all_links/all_vs_all.aln.txt | ||
|
||
# Remove tmp | ||
rm ${RESULTS}/all_vs_all_blast/tmp.blast | ||
rm ${RESULTS}/all_vs_all_links/tmp.blast | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,13 +33,20 @@ Copyright, Felipe Almeida <[email protected]>, 2021 | |
# Input min. length | ||
--minlen Min size of contigs to consider for plot [Default: 10000] | ||
# Links (blastn) min. percentage id | ||
# Links configurations | ||
--skip_links Do not compute blast and do not draw links. | ||
Useful for when only desiring the configs. [Default: false] | ||
--use_minimap2 Compute links with minimap2. This is only useful for big genomes. | ||
With small genomes probably no align block will be detected. | ||
For minimap2, --minid and --linklen have no effect. By default uses blastn. | ||
--minimap2_method Select alignment "method" / "algorithm" for minimap2. | ||
Options: asm5 | asm10 | asm20. [Default: asm20] | ||
--minid Min. percentage id to filter the results of blastn to draw links [Default: 85] | ||
--minid Min. percentage id to filter the results of blastn (only for blastn) to draw links [Default: 85] | ||
--linklen Min. link (blastn hit) length to display in plot [Default: 5000] | ||
--linklen Min. length of blastn hits (only for blastn) length to display in plot [Default: 5000] | ||
--show_intrachr Tells the program to create a conf file showing intra chr links [Default: false] | ||
Mandatory if using only one FASTA, otherwise, links will not be shown. | ||
|
@@ -60,6 +67,13 @@ Copyright, Felipe Almeida <[email protected]>, 2021 | |
3 or 4 columns as shown at http://circos.ca/documentation/tutorials/configuration/data_files. | ||
The first column must be the name (ID) of the contig. | ||
Checkout the "--gff2tiles" script (below). | ||
# Housekeeping conf | ||
--max_ticks Max number of ticks allowed to plot. [Default: 5000] | ||
--max_ideograms Max number of ideograms allowed to plot. [Default: 200] | ||
--max_links Max number of links allowed to plot. [Default: 50000] | ||
--max_points_per_track Max number of points per track (e.g. histogram) allowed to plot. [Default: 50000] | ||
# Helpful scripts! | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
minimap_links() | ||
{ | ||
# create dir | ||
mkdir -p ${RESULTS}/all_vs_all_links ${RESULTS}/conf | ||
|
||
# concatenate genomes | ||
cat ${RESULTS}/filtered/* >> ${RESULTS}/concatenated_genomes.fasta ; | ||
export CONCAT_FASTA=${RESULTS}/concatenated_genomes.fasta | ||
|
||
# run minimap2 | ||
minimap2 \ | ||
-c --cs -t $THREADS \ | ||
-x asm20 -o ${RESULTS}/all_vs_all_links/all_vs_all.aln.txt \ | ||
$CONCAT_FASTA $CONCAT_FASTA 2> /dev/null | ||
|
||
# parse paf | ||
cut -f 1,3,4,6,8,9 ${RESULTS}/all_vs_all_links/all_vs_all.aln.txt >> ${RESULTS}/conf/links_concatenated.txt | ||
|
||
# get links comming from contigs and give it colors | ||
IFS=',' | ||
while read -r FASTA FASTA_PREFIX FASTA_COLOR ; do | ||
bioawk -c fastx '{ printf $name"\n" }' $FASTA > tmp_names.fasta ; | ||
awk -v color1=$FASTA_COLOR -F'\t' 'NR==FNR{c[$1]++;next};c[$1] > 0 {print $0 "\t" "color="color1}' \ | ||
tmp_names.fasta ${RESULTS}/conf/links_concatenated.txt >> ${RESULTS}/conf/links_concatenated_colored.txt | ||
rm tmp_names.fasta ; | ||
done<"$FOFN" | ||
|
||
# create additional file whithout intrachr links | ||
awk \ | ||
-F'\t' \ | ||
'{ if ($1 != $4) { print } }' \ | ||
${RESULTS}/conf/links_concatenated_colored.txt > ${RESULTS}/conf/links_concatenated_colored_no_intrachr.txt ; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters