From 2778d5905e5a359d2231b73bda71fb31481465d4 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Thu, 3 Oct 2024 15:18:41 +0900 Subject: [PATCH] Report on softmask overlaps, and rename softmask stats local module. --- modules/local/multiqc_softmask_overlaps.nf | 48 ++++++++++++++++++++ modules/local/multiqc_softmask_statistics.nf | 6 +-- workflows/pairgenomealignmask.nf | 12 +++-- 3 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 modules/local/multiqc_softmask_overlaps.nf diff --git a/modules/local/multiqc_softmask_overlaps.nf b/modules/local/multiqc_softmask_overlaps.nf new file mode 100644 index 0000000..003650f --- /dev/null +++ b/modules/local/multiqc_softmask_overlaps.nf @@ -0,0 +1,48 @@ +process MULTIQC_SOFTMASK_OVERLAPS { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + // Recycling bedtools image as we already use it elsewhere + + input: + path (files) + + output: + path "*_mqc.tsv", emit: tsv + + when: + task.ext.when == null || task.ext.when + + script: + """ + # Summarise Jaccard indexes for MultiQC + echo "# id: 'Jaccard indexes'" > jaccard_indexes_mqc.tsv + echo "# section_name: 'Repeat masking overlap statistics'" >> jaccard_indexes_mqc.tsv + echo "# format: 'tsv'" >> jaccard_indexes_mqc.tsv + echo "# plot_type: 'table'" >> jaccard_indexes_mqc.tsv + echo "# description: 'This plot shows a brief summary of the overlaps between the soft masks generated by each process'" >> jaccard_indexes_mqc.tsv + echo "# pconfig:" >> jaccard_indexes_mqc.tsv + echo "# id: 'Jaccard indexes'" >> jaccard_indexes_mqc.tsv + echo "# title: 'Jaccard indexes'" >> jaccard_indexes_mqc.tsv + echo "# ylab: ''" >> jaccard_indexes_mqc.tsv + echo "id\ttantan–WindowMasker\ttantan–RepeatMasker\tWindowMasker–RepeatMasker" >> jaccard_indexes_mqc.tsv + # Here we loop on samples + for SAMPLE in \$(basename -s _tantan_windowmasker_jaccard.txt *_tantan_windowmasker_jaccard.txt) + do + printf "\${SAMPLE}\t" >> jaccard_indexes_mqc.tsv + sed -n 2p \${SAMPLE}_tantan_windowmasker_jaccard.txt | awk '{printf \$3"\t"}' >> jaccard_indexes_mqc.tsv + sed -n 2p \${SAMPLE}_tantan_repeatmasker_jaccard.txt | awk '{printf \$3"\t"}' >> jaccard_indexes_mqc.tsv + sed -n 2p \${SAMPLE}_repeatmasker_windowmasker_jaccard.txt | awk '{printf \$3 }' >> jaccard_indexes_mqc.tsv + printf '\n' >> jaccard_indexes_mqc.tsv + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_softmask_statistics.nf b/modules/local/multiqc_softmask_statistics.nf index cc65c44..b3f2a5b 100644 --- a/modules/local/multiqc_softmask_statistics.nf +++ b/modules/local/multiqc_softmask_statistics.nf @@ -1,4 +1,4 @@ -process SOFTMASK_STATS { +process MULTIQC_SOFTMASK_STATS { label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -19,8 +19,8 @@ process SOFTMASK_STATS { def args = task.ext.args ?: '' """ # Here we make the header - echo "# id: 'repeat summary'" > masking_stats_mqc.tsv - echo "# section_name: 'repeat masking summary statistics'" >> masking_stats_mqc.tsv + echo "# id: 'Repeat summary'" > masking_stats_mqc.tsv + echo "# section_name: 'Repeat masking summary statistics'" >> masking_stats_mqc.tsv echo "# format: 'tsv'" >> masking_stats_mqc.tsv echo "# plot_type: 'table'" >> masking_stats_mqc.tsv echo "# description: 'This plot shows a brief summary of each genomes whose repeats has been masked'" >> masking_stats_mqc.tsv diff --git a/workflows/pairgenomealignmask.nf b/workflows/pairgenomealignmask.nf index 47a7c87..c3f8171 100644 --- a/workflows/pairgenomealignmask.nf +++ b/workflows/pairgenomealignmask.nf @@ -30,7 +30,8 @@ include { SEQTK_CUTN as EXTLIB_BED } from '../modules/ include { MERGE_MASKS as MERGEDMASKS } from '../modules/local/mergemasks.nf' include { GFASTATS as MERGEDMASKS_STATS } from '../modules/nf-core/gfastats/main' -include { SOFTMASK_STATS } from '../modules/local/multiqc_softmask_statistics.nf' +include { MULTIQC_SOFTMASK_STATS } from '../modules/local/multiqc_softmask_statistics.nf' +include { MULTIQC_SOFTMASK_OVERLAPS } from '../modules/local/multiqc_softmask_overlaps.nf' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' @@ -124,7 +125,7 @@ workflow PAIRGENOMEALIGNMASK { // Aggregation of statistics // - SOFTMASK_STATS ( channel.empty() + MULTIQC_SOFTMASK_STATS ( channel.empty() .mix( TANTAN_STATS.out.assembly_summary.map {it[1]} ) .mix( WINDOWMASKER_STATS.out.assembly_summary.map {it[1]} ) .mix( REPEATMODELER_STATS.out.assembly_summary.map {it[1]} ) @@ -133,7 +134,12 @@ workflow PAIRGENOMEALIGNMASK { .mix( MERGEDMASKS_STATS.out.assembly_summary.map {it[1]} ) .collect() ) - ch_multiqc_files = ch_multiqc_files.mix(SOFTMASK_STATS.out.tsv) + ch_multiqc_files = ch_multiqc_files.mix(MULTIQC_SOFTMASK_STATS.out.tsv) + + // Aggregation of statistics (Jaccard indices) + // + MULTIQC_SOFTMASK_OVERLAPS ( MERGEDMASKS.out.txt.map{it[1]}.collect() ) + ch_multiqc_files = ch_multiqc_files.mix(MULTIQC_SOFTMASK_OVERLAPS.out.tsv) // Collect software versions