Skip to content

Commit

Permalink
Collect statistics from the all-maskers-combined genome
Browse files Browse the repository at this point in the history
  • Loading branch information
charles-plessy committed Oct 3, 2024
1 parent 3ca8049 commit 9217db1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 4 deletions.
4 changes: 4 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ process {
time = { check_max( 30.h * task.attempt, 'time' ) }
}

withName: 'MERGEDMASKS_STATS' {
ext.prefix = { "${meta.id}_allmaskers" }
}

withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
Expand Down
3 changes: 2 additions & 1 deletion modules/local/multiqc_softmask_statistics.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ process SOFTMASK_STATS {
echo "# id: 'repeat summary'" >> masking_stats_mqc.tsv
echo "# title: 'repeat summary'" >> masking_stats_mqc.tsv
echo "# ylab: ''" >> masking_stats_mqc.tsv
echo "id\tTotal scaffold length\tTotal contig length\ttantan masked bases\twindowmasker masked bases\trmodeler masked bases (REPM)\trmodeler masked bases (DFAM)\trmodeler masked bases (EXTR)" >> masking_stats_mqc.tsv
echo "id\tTotal scaffold length\tTotal contig length\ttantan masked bases\twindowmasker masked bases\trmodeler masked bases (REPM)\trmodeler masked bases (DFAM)\trmodeler masked bases (EXTR)\tAll maskers combined" >> masking_stats_mqc.tsv
# Here we loop on samples
for file in *_tantan.assembly_summary
do
Expand All @@ -47,6 +47,7 @@ process SOFTMASK_STATS {
printf '\tNA' >> masking_stats_mqc.tsv
fi
done
grep 'soft-masked bases' \${SAMPLE}_allmaskers.assembly_summary | awk '{printf "\t"\$4}' >> masking_stats_mqc.tsv
printf '\n' >> masking_stats_mqc.tsv
done
"""
Expand Down
9 changes: 6 additions & 3 deletions workflows/pairgenomealignmask.nf
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ include { GFASTATS as EXTLIB_STATS } from '../modules/
include { SEQTK_CUTN as EXTLIB_BED } from '../modules/local/seqtk.nf'

include { MERGE_MASKS as MERGEDMASKS } from '../modules/local/mergemasks.nf'
include { GFASTATS as MERGEDMASKS_STATS } from '../modules/nf-core/gfastats/main'
include { SOFTMASK_STATS } from '../modules/local/multiqc_softmask_statistics.nf'

include { MULTIQC } from '../modules/nf-core/multiqc/main'
Expand Down Expand Up @@ -115,10 +116,11 @@ workflow PAIRGENOMEALIGNMASK {
//
MERGEDMASKS (
input_genomes
.join(TANTAN_BED.out.bed_gz.map{meta, bed -> [ [id:meta.key ] , bed ] } )
.join(WINDOWMASKER_BED.out.bed_gz.map{meta, bed -> [ [id:meta.key ] , bed ] })
.join(REPEATMODELER_BED.out.bed_gz.map{meta, bed -> [ [id:meta.key ] , bed ] })
.join(TANTAN_BED.out.bed_gz.map {meta, bed -> [ [id:meta.key ] , bed ] } )
.join(WINDOWMASKER_BED.out.bed_gz.map {meta, bed -> [ [id:meta.key ] , bed ] } )
.join(REPEATMODELER_BED.out.bed_gz.map{meta, bed -> [ [id:meta.key ] , bed ] } )
)
MERGEDMASKS_STATS ( MERGEDMASKS.out.fasta )

// Aggregation of statistics
//
Expand All @@ -128,6 +130,7 @@ workflow PAIRGENOMEALIGNMASK {
.mix( REPEATMODELER_STATS.out.assembly_summary.map {it[1]} )
.mix( DFAM_STATS_maybeout .map {it[1]} )
.mix( EXTLIB_STATS_maybeout .map {it[1]} )
.mix( MERGEDMASKS_STATS.out.assembly_summary.map {it[1]} )
.collect()
)
ch_multiqc_files = ch_multiqc_files.mix(SOFTMASK_STATS.out.tsv)
Expand Down

0 comments on commit 9217db1

Please sign in to comment.