Skip to content

Commit

Permalink
Report on softmask overlaps, and rename softmask stats local module.
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-plessy committed Oct 3, 2024
1 parent 61f5adc commit 2778d59
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 6 deletions.
48 changes: 48 additions & 0 deletions modules/local/multiqc_softmask_overlaps.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process MULTIQC_SOFTMASK_OVERLAPS {
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' :
'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }"

// Recycling bedtools image as we already use it elsewhere

input:
path (files)

output:
path "*_mqc.tsv", emit: tsv

when:
task.ext.when == null || task.ext.when

script:
"""
# Summarise Jaccard indexes for MultiQC
echo "# id: 'Jaccard indexes'" > jaccard_indexes_mqc.tsv
echo "# section_name: 'Repeat masking overlap statistics'" >> jaccard_indexes_mqc.tsv
echo "# format: 'tsv'" >> jaccard_indexes_mqc.tsv
echo "# plot_type: 'table'" >> jaccard_indexes_mqc.tsv
echo "# description: 'This plot shows a brief summary of the overlaps between the soft masks generated by each process'" >> jaccard_indexes_mqc.tsv
echo "# pconfig:" >> jaccard_indexes_mqc.tsv
echo "# id: 'Jaccard indexes'" >> jaccard_indexes_mqc.tsv
echo "# title: 'Jaccard indexes'" >> jaccard_indexes_mqc.tsv
echo "# ylab: ''" >> jaccard_indexes_mqc.tsv
echo "id\ttantan–WindowMasker\ttantan–RepeatMasker\tWindowMasker–RepeatMasker" >> jaccard_indexes_mqc.tsv
# Here we loop on samples
for SAMPLE in \$(basename -s _tantan_windowmasker_jaccard.txt *_tantan_windowmasker_jaccard.txt)
do
printf "\${SAMPLE}\t" >> jaccard_indexes_mqc.tsv
sed -n 2p \${SAMPLE}_tantan_windowmasker_jaccard.txt | awk '{printf \$3"\t"}' >> jaccard_indexes_mqc.tsv
sed -n 2p \${SAMPLE}_tantan_repeatmasker_jaccard.txt | awk '{printf \$3"\t"}' >> jaccard_indexes_mqc.tsv
sed -n 2p \${SAMPLE}_repeatmasker_windowmasker_jaccard.txt | awk '{printf \$3 }' >> jaccard_indexes_mqc.tsv
printf '\n' >> jaccard_indexes_mqc.tsv
done
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
END_VERSIONS
"""
}
6 changes: 3 additions & 3 deletions modules/local/multiqc_softmask_statistics.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process SOFTMASK_STATS {
process MULTIQC_SOFTMASK_STATS {
label 'process_single'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -19,8 +19,8 @@ process SOFTMASK_STATS {
def args = task.ext.args ?: ''
"""
# Here we make the header
echo "# id: 'repeat summary'" > masking_stats_mqc.tsv
echo "# section_name: 'repeat masking summary statistics'" >> masking_stats_mqc.tsv
echo "# id: 'Repeat summary'" > masking_stats_mqc.tsv
echo "# section_name: 'Repeat masking summary statistics'" >> masking_stats_mqc.tsv
echo "# format: 'tsv'" >> masking_stats_mqc.tsv
echo "# plot_type: 'table'" >> masking_stats_mqc.tsv
echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> masking_stats_mqc.tsv
Expand Down
12 changes: 9 additions & 3 deletions workflows/pairgenomealignmask.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ include { SEQTK_CUTN as EXTLIB_BED } from '../modules/

include { MERGE_MASKS as MERGEDMASKS } from '../modules/local/mergemasks.nf'
include { GFASTATS as MERGEDMASKS_STATS } from '../modules/nf-core/gfastats/main'
include { SOFTMASK_STATS } from '../modules/local/multiqc_softmask_statistics.nf'
include { MULTIQC_SOFTMASK_STATS } from '../modules/local/multiqc_softmask_statistics.nf'
include { MULTIQC_SOFTMASK_OVERLAPS } from '../modules/local/multiqc_softmask_overlaps.nf'

include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-validation'
Expand Down Expand Up @@ -124,7 +125,7 @@ workflow PAIRGENOMEALIGNMASK {

// Aggregation of statistics
//
SOFTMASK_STATS ( channel.empty()
MULTIQC_SOFTMASK_STATS ( channel.empty()
.mix( TANTAN_STATS.out.assembly_summary.map {it[1]} )
.mix( WINDOWMASKER_STATS.out.assembly_summary.map {it[1]} )
.mix( REPEATMODELER_STATS.out.assembly_summary.map {it[1]} )
Expand All @@ -133,7 +134,12 @@ workflow PAIRGENOMEALIGNMASK {
.mix( MERGEDMASKS_STATS.out.assembly_summary.map {it[1]} )
.collect()
)
ch_multiqc_files = ch_multiqc_files.mix(SOFTMASK_STATS.out.tsv)
ch_multiqc_files = ch_multiqc_files.mix(MULTIQC_SOFTMASK_STATS.out.tsv)

// Aggregation of statistics (Jaccard indices)
//
MULTIQC_SOFTMASK_OVERLAPS ( MERGEDMASKS.out.txt.map{it[1]}.collect() )
ch_multiqc_files = ch_multiqc_files.mix(MULTIQC_SOFTMASK_OVERLAPS.out.tsv)


// Collect software versions
Expand Down

0 comments on commit 2778d59

Please sign in to comment.