diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dbc6e02..567ac13 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,3 +41,7 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + + - name: Run pipeline with unaligned test data + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_align,docker --outdir ./results --align diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml index 28f7625..a4e5426 100644 --- a/.github/workflows/sanger_test.yml +++ b/.github/workflows/sanger_test.yml @@ -19,8 +19,9 @@ jobs: parameters: | { "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ github.sha }}", + "align": true } - profiles: test,sanger,singularity,cleanup + profiles: test_align,sanger,singularity,cleanup - uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/sanger_test_full.yml b/.github/workflows/sanger_test_full.yml index e028c6b..a552c63 100644 --- a/.github/workflows/sanger_test_full.yml +++ b/.github/workflows/sanger_test_full.yml @@ -32,8 +32,9 @@ jobs: parameters: | { "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}", + "align": true, } - profiles: test_full,sanger,singularity,cleanup + profiles: test_full_align,sanger,singularity,cleanup - uses: actions/upload-artifact@v3 with: diff --git a/assets/samplesheet_test_align.csv b/assets/samplesheet_test_align.csv new file mode 100644 index 0000000..4b5a9b2 --- /dev/null +++ b/assets/samplesheet_test_align.csv @@ -0,0 +1,4 @@ +sample,datatype,datafile +icCanRufa1,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Cantharis_rufa/genomic_data/icCanRufa1/pacbio/m64094_200730_174533.ccs.bc1010_BAK8A_OA--bc1010_BAK8A_OA_0_03.bam +icCanRufa1,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Cantharis_rufa/genomic_data/icCanRufa1/pacbio/m64094_200730_174533.ccs.bc1010_BAK8A_OA--bc1010_BAK8A_OA_0_02.bam +icCanRufa1XXXXX,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Cantharis_rufa/genomic_data/icCanRufa1/pacbio/m64094_200730_174533.ccs.bc1010_BAK8A_OA--bc1010_BAK8A_OA_0_02.bam diff --git a/assets/samplesheet_test_full_align.csv b/assets/samplesheet_test_full_align.csv new file mode 100644 index 0000000..d5b0ac4 --- /dev/null +++ b/assets/samplesheet_test_full_align.csv @@ -0,0 +1,2 @@ +sample,datatype,datafile +ilPolIcar1,pacbio,/lustre/scratch124/tol/projects/darwin/data/insects/Polyommatus_icarus/genomic_data/ilPolIcar1/pacbio/m64016_191206_183623.ccs.bc1019_BAK8B_OA--bc1019_BAK8B_OA.bam diff --git a/assets/vectorDB.tar.gz b/assets/vectorDB.tar.gz new file mode 100644 index 0000000..f9b08d5 Binary files /dev/null and b/assets/vectorDB.tar.gz differ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d088e65..3a6b9d7 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -108,6 +108,11 @@ def validate_unique_samples(self): """ if len(self._seen) != len(self.validated): raise AssertionError("The combination of sample name and data file must be unique.") + seen = Counter() + for row in self.validated: + sample = row[self._sample_col] + seen[sample] += 1 + row[self._sample_col] = f"{sample}_T{seen[sample]}" def read_head(handle, num_lines=10): diff --git a/bin/pacbio_filter.sh b/bin/pacbio_filter.sh new file mode 100755 index 0000000..73d7caa --- /dev/null +++ b/bin/pacbio_filter.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +input=$1 +output=$2 + +grep -v 'MG551957' $input | awk -v OFS='\t' '{if (($2 ~ /NGB00972/ && $3 >= 97 && $4 >= 44) || ($2 ~ /NGB00973/ && $3 >= 97 && $4 >= 34) || ($2 ~ /^bc/ && $3 >= 99 && $4 >= 16)) print $1}' | sort -u > $output diff --git a/conf/base.config b/conf/base.config index c4ea647..e08d741 100644 --- a/conf/base.config +++ b/conf/base.config @@ -16,7 +16,7 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } - maxRetries = 1 + maxRetries = 5 maxErrors = '-1' // Process-specific resource requirements diff --git a/conf/modules.config b/conf/modules.config index cb326e7..acc63f5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,15 +20,73 @@ process { ] } + if( params.align ) { + + withName: '.*:ALIGN_PACBIO:FILTER_PACBIO:SAMTOOLS_CONVERT' { + ext.args = "-e '[rq]>=0.99' -x fi -x fp -x ri -x rp --write-index --output-fmt bam" + } + + withName: '.*:ALIGN_PACBIO:FILTER_PACBIO:SAMTOOLS_COLLATE' { + ext.prefix = { "${meta.id}.collate" } + } + + withName: '.*:ALIGN_PACBIO:FILTER_PACBIO:BLAST_BLASTN' { + ext.args = '-task blastn -reward 1 -penalty -5 -gapopen 3 -gapextend 3 -dust yes -soft_masking true -evalue .01 -searchsp 1750000000000 -outfmt 6' + } + + withName: '.*:ALIGN_PACBIO:FILTER_PACBIO:SAMTOOLS_FILTER' { + ext.prefix = { "${meta.id}.filter" } + } + + withName: '.*:ALIGN_PACBIO:FILTER_PACBIO:SAMTOOLS_FASTQ' { + ext.args = '-F 0x200 -nt' + } + + withName: '.*:.*:ALIGN_PACBIO:MINIMAP2_ALIGN' { + ext.args = { "-ax map-hifi --cs=short -R ${meta.read_group}" } + } + + withName: '.*:.*:ALIGN_PACBIO:SAMTOOLS_MERGE' { + ext.args = { "-c -p" } + ext.prefix = { "${meta.id}.merge" } + } + + withName: '.*:CONVERT_STATS:SAMTOOLS_VIEW' { + ext.prefix = { "${meta2.id}.${meta.datatype}.${meta.id}" } + ext.args = '--output-fmt cram --write-index' + } + + withName: '.*:CONVERT_STATS:SAMTOOLS_STATS' { + ext.prefix = { "${input.baseName}" } + } + + withName: '.*:CONVERT_STATS:SAMTOOLS_FLAGSTAT' { + ext.prefix = { "${bam.baseName}" } + } + + withName: '.*:CONVERT_STATS:SAMTOOLS_IDXSTATS' { + ext.prefix = { "${bam.baseName}" } + } + + withName: '.*:ALIGN_PACBIO:CONVERT_STATS:.*' { + publishDir = [ + path: { "${params.outdir}/variant_calling" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } else{ + + withName: '.*:INPUT_MERGE:SAMTOOLS_MERGE' { + ext.args = '--write-index' + } + } + withName: '.*:INPUT_FILTER_SPLIT:SAMTOOLS_VIEW' { ext.args = '--output-fmt cram --write-index -F 0x900' ext.prefix = { "${meta.id}_filtered" } } - withName: '.*:INPUT_MERGE:SAMTOOLS_MERGE' { - ext.args = '--write-index' - } - withName: '.*:DEEPVARIANT_CALLER:DEEPVARIANT' { ext.args = '--model_type=PACBIO' } diff --git a/conf/test_align.config b/conf/test_align.config new file mode 100644 index 0000000..8da6b65 --- /dev/null +++ b/conf/test_align.config @@ -0,0 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run sanger-tol/variantcalling -profile test_align, --outdir --align + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile with alignment' + config_profile_description = 'Minimal unaligned test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/assets/samplesheet_test_align.csv" + + // Fasta references + fasta = "https://tolit.cog.sanger.ac.uk/test-data/Cantharis_rufa/assembly/GCA_947369205.1_OX376310.1_CANBKR010000003.1.fasta.gz" +} diff --git a/conf/test_full_align.config b/conf/test_full_align.config new file mode 100644 index 0000000..79b9fd7 --- /dev/null +++ b/conf/test_full_align.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests with alignment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run sanger-tol/variantcalling -profile test_full_align, --outdir --align + +---------------------------------------------------------------------------------------- +*/ + +cleanup = true + +params { + config_profile_name = 'Full test profile with alignment' + config_profile_description = 'Full non-aligned test dataset to check pipeline function' + + // Input data for full size test + input = "${projectDir}/assets/samplesheet_test_full_align.csv" + + // Fasta references + fasta = "/lustre/scratch124/tol/projects/darwin/data/insects/Polyommatus_icarus/assembly/release/ilPolIcar1.1/insdc/GCA_937595015.1.fasta.gz" + +} diff --git a/modules.json b/modules.json index 1ab14cf..f5561b1 100644 --- a/modules.json +++ b/modules.json @@ -10,6 +10,11 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "blast/blastn": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "cat/cat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", @@ -25,11 +30,46 @@ "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, + "gunzip": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "minimap2/align": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "samtools/collate": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, + "samtools/fasta": { + "branch": "master", + "git_sha": "6f4299292ef2c5b66e6829527b2647c301b77cc9", + "installed_by": ["modules"] + }, + "samtools/fastq": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "63e817de8c617131447192ab2c4e70b4ed4071f7", + "installed_by": ["modules"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "63e817de8c617131447192ab2c4e70b4ed4071f7", + "installed_by": ["modules"] + }, "samtools/merge": { "branch": "master", "git_sha": "e7ce60acc8a33fa17429e966364657a63016e870", @@ -41,14 +81,25 @@ "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", "installed_by": ["modules"] }, + "samtools/stats": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "samtools/view": { "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/view/samtools-view.diff" + }, + "untar": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "vcftools": { "branch": "master", - "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "git_sha": "485558b40040fc3ace093d9084210125d8ba4c97", "installed_by": ["modules"], "patch": "modules/nf-core/vcftools/vcftools.diff" } diff --git a/modules/local/pacbio_filter.nf b/modules/local/pacbio_filter.nf new file mode 100644 index 0000000..18dd11c --- /dev/null +++ b/modules/local/pacbio_filter.nf @@ -0,0 +1,30 @@ +process PACBIO_FILTER { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'quay.io/biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(txt) + + output: + path("*.blocklist"), emit: list + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pacbio_filter.sh $txt ${prefix}.blocklist + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + GNU Awk: \$(echo \$(awk --version 2>&1) | grep -i awk | sed 's/GNU Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml new file mode 100644 index 0000000..cb9b15d --- /dev/null +++ b/modules/nf-core/blast/blastn/environment.yml @@ -0,0 +1,7 @@ +name: blast_blastn +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::blast=2.14.1 diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf new file mode 100644 index 0000000..e8b96ad --- /dev/null +++ b/modules/nf-core/blast/blastn/main.nf @@ -0,0 +1,57 @@ +process BLAST_BLASTN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0': + 'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(db) + + output: + tuple val(meta), path('*.txt'), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + DB=`find -L ./ -name "*.nin" | sed 's/\\.nin\$//'` + blastn \\ + -num_threads ${task.cpus} \\ + -db \$DB \\ + -query ${fasta_name} \\ + ${args} \\ + -out ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml new file mode 100644 index 0000000..a0d64dd --- /dev/null +++ b/modules/nf-core/blast/blastn/meta.yml @@ -0,0 +1,55 @@ +name: blast_blastn +description: Queries a BLAST DNA database +keywords: + - fasta + - blast + - blastn + - DNA sequence +tools: + - blast: + description: | + BLAST finds regions of similarity between biological sequences. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - txt: + type: file + description: File containing blastn hits + pattern: "*.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@vagkaratzas" diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test new file mode 100644 index 0000000..1058c81 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/main.nf.test @@ -0,0 +1,71 @@ +nextflow_process { + + name "Test Process BLAST_BLASTN" + script "../main.nf" + process "BLAST_BLASTN" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/blastn" + + setup { + run("BLAST_MAKEBLASTDB") { + script "../../makeblastdb/main.nf" + process { + """ + input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + } + + test("Should search for nucleotide hits against a blast db") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.txt.get(0).get(1)).getText().contains("Query= MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate") }, + { assert process.out.versions } + ) + } + + } + + test("Should search for zipped nucleotide hits against a blast db") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.txt.get(0).get(1)).getText().contains("Query= MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate") }, + { assert process.out.versions } + ) + } + + } + +} diff --git a/modules/nf-core/blast/blastn/tests/nextflow.config b/modules/nf-core/blast/blastn/tests/nextflow.config new file mode 100644 index 0000000..0899289 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl' + } +} diff --git a/modules/nf-core/blast/blastn/tests/tags.yml b/modules/nf-core/blast/blastn/tests/tags.yml new file mode 100644 index 0000000..b4588ab --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/tags.yml @@ -0,0 +1,2 @@ +blast/blastn: + - modules/nf-core/blast/blastn/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..25910b3 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 0000000..468a6f2 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,48 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 0000000..231034f --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,39 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..d031792 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..720fd9f --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 0000000..60b9a8b --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,8 @@ +name: minimap2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::minimap2=2.24 + - bioconda::samtools=1.14 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf new file mode 100644 index 0000000..fa3ae50 --- /dev/null +++ b/modules/nf-core/minimap2/align/main.nf @@ -0,0 +1,48 @@ +process MINIMAP2_ALIGN { + tag "$meta.id" + label 'process_medium' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(reference) + val bam_format + val cigar_paf_format + val cigar_bam + + output: + tuple val(meta), path("*.paf"), optional: true, emit: paf + tuple val(meta), path("*.bam"), optional: true, emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + """ + minimap2 \\ + $args \\ + -t $task.cpus \\ + "${reference ?: reads}" \\ + "$reads" \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml new file mode 100644 index 0000000..408522d --- /dev/null +++ b/modules/nf-core/minimap2/align/meta.yml @@ -0,0 +1,75 @@ +name: minimap2_align +description: A versatile pairwise aligner for genomic and spliced nucleotide sequences +keywords: + - align + - fasta + - fastq + - genome + - paf + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - paf: + type: file + description: Alignment in PAF format + pattern: "*.paf" + - bam: + type: file + description: Alignment in BAM format + pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" diff --git a/modules/nf-core/samtools/collate/environment.yml b/modules/nf-core/samtools/collate/environment.yml new file mode 100644 index 0000000..0fb861b --- /dev/null +++ b/modules/nf-core/samtools/collate/environment.yml @@ -0,0 +1,7 @@ +name: samtools_collate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/collate/main.nf b/modules/nf-core/samtools/collate/main.nf new file mode 100644 index 0000000..38a4daf --- /dev/null +++ b/modules/nf-core/samtools/collate/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_COLLATE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0': + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + path fasta + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + collate \\ + $args \\ + ${reference} \\ + -@ $task.cpus \\ + -o ${prefix}.${extension} \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/collate/meta.yml b/modules/nf-core/samtools/collate/meta.yml new file mode 100644 index 0000000..3c79927 --- /dev/null +++ b/modules/nf-core/samtools/collate/meta.yml @@ -0,0 +1,43 @@ +name: "samtools_collate" +description: shuffles and groups reads together by their names +keywords: + - collate + - bam +tools: + - "samtools": + description: "Tools for dealing with SAM, BAM and CRAM files" + homepage: "http://www.htslib.org" + documentation: "https://www.htslib.org/doc/samtools-collate.html" + tool_dev_url: "https://github.com/samtools/samtools" + doi: "10.1093/bioinformatics/btp352" + licence: "['MIT']" +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: Collated BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +authors: + - "@priyanka-surana" +maintainers: + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/fasta/environment.yml b/modules/nf-core/samtools/fasta/environment.yml new file mode 100644 index 0000000..8a82f9e --- /dev/null +++ b/modules/nf-core/samtools/fasta/environment.yml @@ -0,0 +1,7 @@ +name: samtools_fasta +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/fasta/main.nf b/modules/nf-core/samtools/fasta/main.nf new file mode 100644 index 0000000..dc4ad98 --- /dev/null +++ b/modules/nf-core/samtools/fasta/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FASTA { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fasta.gz") , optional:true, emit: fasta + tuple val(meta), path("*_interleaved.fasta.gz"), optional:true, emit: interleaved + tuple val(meta), path("*_singleton.fasta.gz") , optional:true, emit: singleton + tuple val(meta), path("*_other.fasta.gz") , optional:true, emit: other + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fasta.gz" : + meta.single_end ? "-1 ${prefix}_1.fasta.gz -s ${prefix}_singleton.fasta.gz" : + "-1 ${prefix}_1.fasta.gz -2 ${prefix}_2.fasta.gz -s ${prefix}_singleton.fasta.gz" + """ + samtools \\ + fasta \\ + $args \\ + --threads ${task.cpus-1} \\ + -0 ${prefix}_other.fasta.gz \\ + $input \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/fasta/meta.yml b/modules/nf-core/samtools/fasta/meta.yml new file mode 100644 index 0000000..eae26f0 --- /dev/null +++ b/modules/nf-core/samtools/fasta/meta.yml @@ -0,0 +1,60 @@ +name: "samtools_fasta" +description: Converts a SAM/BAM/CRAM file to FASTA +keywords: + - bam + - sam + - cram + - fasta +tools: + - "samtools": + description: "Tools for dealing with SAM, BAM and CRAM files" + homepage: "http://www.htslib.org" + documentation: "https://www.htslib.org/doc/samtools-fasta.html" + tool_dev_url: "https://github.com/samtools/samtools" + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - interleave: + type: boolean + description: Set true for interleaved fasta files +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Compressed FASTA file(s) with reads with either the READ1 or READ2 flag set in separate files. + pattern: "*_{1,2}.fasta.gz" + - interleaved: + type: file + description: Compressed FASTA file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file. + pattern: "*_interleaved.fasta.gz" + - singleton: + type: file + description: Compressed FASTA file with singleton reads + pattern: "*_singleton.fasta.gz" + - other: + type: file + description: Compressed FASTA file with reads with either both READ1 and READ2 flags set or unset + pattern: "*_other.fasta.gz" +authors: + - "@priyanka-surana" +maintainers: + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml new file mode 100644 index 0000000..1b7124d --- /dev/null +++ b/modules/nf-core/samtools/fastq/environment.yml @@ -0,0 +1,7 @@ +name: samtools_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf new file mode 100644 index 0000000..ed8d755 --- /dev/null +++ b/modules/nf-core/samtools/fastq/main.nf @@ -0,0 +1,44 @@ +process SAMTOOLS_FASTQ { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + val(interleave) + + output: + tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq + tuple val(meta), path("*_interleaved.fastq.gz"), optional:true, emit: interleaved + tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton + tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq.gz" : + meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : + "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" + """ + samtools \\ + fastq \\ + $args \\ + --threads ${task.cpus-1} \\ + -0 ${prefix}_other.fastq.gz \\ + $input \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/fastq/meta.yml b/modules/nf-core/samtools/fastq/meta.yml new file mode 100644 index 0000000..c4002a4 --- /dev/null +++ b/modules/nf-core/samtools/fastq/meta.yml @@ -0,0 +1,62 @@ +name: samtools_fastq +description: Converts a SAM/BAM/CRAM file to FASTQ +keywords: + - bam + - sam + - cram + - fastq +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - interleave: + type: boolean + description: Set true for interleaved fastq file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Compressed FASTQ file(s) with reads with either the READ1 or READ2 flag set in separate files. + pattern: "*_{1,2}.fastq.gz" + - interleaved: + type: file + description: Compressed FASTQ file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file. + pattern: "*_interleaved.fastq.gz" + - singleton: + type: file + description: Compressed FASTQ file with singleton reads + pattern: "*_singleton.fastq.gz" + - other: + type: file + description: Compressed FASTQ file with reads with either both READ1 and READ2 flags set or unset + pattern: "*_other.fastq.gz" +authors: + - "@priyanka-surana" + - "@suzannejin" +maintainers: + - "@priyanka-surana" + - "@suzannejin" diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 0000000..22bdb5c --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,7 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 0000000..9dee35a --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 0000000..9799135 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 0000000..c618de7 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 0000000..880019f --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "BAM": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "timestamp": "2023-11-14T15:49:22.577133" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 0000000..2d2b725 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 0000000..89bd272 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,7 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 0000000..b22d084 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 0000000..344e92a --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,52 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 0000000..0174a9e --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools/idxstats" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.idxstats).match() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 0000000..4c6c12b --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "BAM": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2023-11-14T15:52:19.875194" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 0000000..d3057c6 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 0000000..ed4e896 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,7 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 0000000..07286ef --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000..735ff81 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,63 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 0000000..e037132 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules/nf-core" + tag "samtools" + tag "samtools/stats" + + test("SAMTOOLS STATS Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here. + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + + ] + input[1] = [[],[]] + """ + + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + test("SAMTOOLS CRAM Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + + + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 0000000..516b2b0 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "SAMTOOLS STATS Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + ] + ], + "1": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + ] + ], + "versions": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ] + } + ], + "timestamp": "2023-10-18T12:12:42.998746" + }, + "SAMTOOLS CRAM Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + ] + ], + "1": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + ] + ], + "versions": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ] + } + ], + "timestamp": "2023-10-18T12:13:30.747222" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 0000000..7c28e30 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000..141e7bd --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,7 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index cb91fac..613c6e7 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" @@ -19,6 +19,7 @@ process SAMTOOLS_VIEW { tuple val(meta), path("*.bai"), emit: bai, optional: true tuple val(meta), path("*.csi"), emit: csi, optional: true tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.unoutput"), emit: unoutput, optional: true path "versions.yml", emit: versions when: @@ -29,7 +30,7 @@ process SAMTOOLS_VIEW { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname}": "" + def readnames = qname ? "--qname-file ${qname} --unoutput ${prefix}.unoutput": "" def file_type = args.contains("--output-fmt sam") ? "sam" : args.contains("--output-fmt bam") ? "bam" : args.contains("--output-fmt cram") ? "cram" : diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3b05450..3dadafa 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -82,3 +82,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/samtools-view.diff b/modules/nf-core/samtools/view/samtools-view.diff new file mode 100644 index 0000000..1fa860a --- /dev/null +++ b/modules/nf-core/samtools/view/samtools-view.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/samtools/view' +--- modules/nf-core/samtools/view/main.nf ++++ modules/nf-core/samtools/view/main.nf +@@ -19,6 +19,7 @@ + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true ++ tuple val(meta), path("*.unoutput"), emit: unoutput, optional: true + path "versions.yml", emit: versions + + when: +@@ -29,7 +30,7 @@ + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" +- def readnames = qname ? "--qname-file ${qname}": "" ++ def readnames = qname ? "--qname-file ${qname} --unoutput ${prefix}.unoutput": "" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + +************************************************************ diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000..d6917da --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,9 @@ +name: untar +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000..8a75bb9 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000..a9a2110 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,46 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 0000000..d40db13 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [], file(params.test_data['sarscov2']['genome']['kraken2_tar_gz'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_different_output_path") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [], file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_different_output_path") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [], file(params.test_data['generic']['tar']['tar_gz'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 0000000..146c867 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,513 @@ +{ + "test_untar_different_output_path": { + "content": [ + [ + [ + [ + + ], + [ + [ + [ + [ + [ + [ + "s_1_1101.bcl:md5,ad01889e2ff43e2f194224e20bdb600c", + "s_1_1101.stats:md5,4bbbf103454b37fbc3138fadf1b4446b" + ], + [ + "s_1_1101.bcl:md5,565384bbe67a694dfd690bae6d1d30c2", + "s_1_1101.stats:md5,55e5abd8f129ff38ef169873547abdb8" + ], + [ + "s_1_1101.bcl:md5,650fa58a630a9148835ba79e323d4237", + "s_1_1101.stats:md5,77403669ca1b05340c390dff64425c1e" + ], + [ + "s_1_1101.bcl:md5,54471c9e97299cd141e202e204637702", + "s_1_1101.stats:md5,67b14c9a89b7f8556674a7524d5cfb2d" + ], + [ + "s_1_1101.bcl:md5,74e4f929fc7476c380fd9d741ddb6700", + "s_1_1101.stats:md5,5730a4c35463eaa12a06b6758710b98c" + ], + [ + "s_1_1101.bcl:md5,c785f472f4350c120c02c888c8189590", + "s_1_1101.stats:md5,fee4ec63895ea81007e06ee6a36ba5e0" + ], + [ + "s_1_1101.bcl:md5,b7ea50bb25f08d43c301741d77050a9b", + "s_1_1101.stats:md5,fa7c68f3122c74d14364e6f7b011af70" + ], + [ + "s_1_1101.bcl:md5,9d5087dc4bcae39d66486363d4f68ecf", + "s_1_1101.stats:md5,23cdceee4d82c4b8e7c60018b9276ace" + ], + [ + "s_1_1101.bcl:md5,581e0c5ee94e8f2de14b2b1d8e777530", + "s_1_1101.stats:md5,9a3536d573c97f66bb56b49463612607" + ], + [ + "s_1_1101.bcl:md5,296fc026bb34c67bbe2b44845fe0d1de", + "s_1_1101.stats:md5,a7f57a7770fb9c5ae2a0fb1ef403ec4f" + ], + [ + "s_1_1101.bcl:md5,2a3ca15531556c36d10d132a9e051de8", + "s_1_1101.stats:md5,2d0bcdb0a1b51d3d79e415db2ab2d3b1" + ], + [ + "s_1_1101.bcl:md5,1150d46a2ccd4ac58aee0585d3e4ffd7", + "s_1_1101.stats:md5,2e97550bd5b5864ffd0565bb7a3f6d40" + ], + [ + "s_1_1101.bcl:md5,0b85c4b3da0de95e7b862d849c5333ae", + "s_1_1101.stats:md5,6eab9746fbeb783b0cd70398f44e0c1a" + ], + [ + "s_1_1101.bcl:md5,e0e9c91f4698804d7a6d1058ef68b34f", + "s_1_1101.stats:md5,790022cdc7878a02b2ebd166e1ddf0a7" + ], + [ + "s_1_1101.bcl:md5,38cd0ad4de359e651c8ac0d5777ea625", + "s_1_1101.stats:md5,a1b1d5ea5371d326abb029774483c5e6" + ], + [ + "s_1_1101.bcl:md5,b0ddc05c4012ccba24e712a1cfec748f", + "s_1_1101.stats:md5,af3d232f839d720f76f40ba06caa2987" + ], + [ + "s_1_1101.bcl:md5,af32fcc5dc3b836cf7a5ba3db85a75dd", + "s_1_1101.stats:md5,f93f2c09bd4e486c74a5f6e2040f7296" + ], + [ + "s_1_1101.bcl:md5,54b7428e037ca87816107647d4a3d9db", + "s_1_1101.stats:md5,e5ac77a72cd7bed5e9bf03cccda0e48c" + ], + [ + "s_1_1101.bcl:md5,fc8b4eacd493bf3d0b20bc23998dc7ff", + "s_1_1101.stats:md5,190315e159e2f4bc4c057ded7470dc52" + ], + [ + "s_1_1101.bcl:md5,9484ecffda489927fce424ac6a44fa9d", + "s_1_1101.stats:md5,0825feeb457ecc9efcf6f8526ba32311" + ], + [ + "s_1_1101.bcl:md5,eec59e21036e31c95ce1e847bfb0a9c4", + "s_1_1101.stats:md5,9acc13f63c98e5a8445e7be70d49222b" + ], + [ + "s_1_1101.bcl:md5,a9fb24476f87cba4fba68e2b3c3f2c07", + "s_1_1101.stats:md5,dc0aa7db9790733291c3e6480ca2a0fc" + ], + [ + "s_1_1101.bcl:md5,ed950b3e82c500927c2e236c9df005c6", + "s_1_1101.stats:md5,dccb71ec47d1f9d33a192da6d5660a45" + ], + [ + "s_1_1101.bcl:md5,b3e992025e995ca56b5ea2820144ef47", + "s_1_1101.stats:md5,a6a829bf2cffb26ac5d9dc3012057699" + ], + [ + "s_1_1101.bcl:md5,89edc726a5a4e0b4ff8ca3899ed0232b", + "s_1_1101.stats:md5,5b9b4fd8110577a59b82d0c419519d29" + ], + [ + "s_1_1101.bcl:md5,4dc696149169f232c451225f563cb5cd", + "s_1_1101.stats:md5,d3514a71ea3adc60e2943c6b8f6e2598" + ], + [ + "s_1_1101.bcl:md5,35b992d0318afb7c825ceaa31b0755e6", + "s_1_1101.stats:md5,2826093acc175c16c3795de7c4ca8f07" + ], + [ + "s_1_1101.bcl:md5,7bc927f56a362e49c00b5d76ee048901", + "s_1_1101.stats:md5,e47d862b795fd6b88a31d7d482ab22f6" + ], + [ + "s_1_1101.bcl:md5,84742233ff2a651626fe9036f27f7cb2", + "s_1_1101.stats:md5,b78fad11d3c50bc76b722cdc03e3028b" + ], + [ + "s_1_1101.bcl:md5,3935341c86263a7938e8c49620ef39f8", + "s_1_1101.stats:md5,cc6585b2daac5354073d150874da9704" + ], + [ + "s_1_1101.bcl:md5,3627f4fd548bf6e64aaf08fba3a342be", + "s_1_1101.stats:md5,120ae4831ae004ff7d16728aef36e82f" + ], + [ + "s_1_1101.bcl:md5,07631014bc35124149fabd80ef19f933", + "s_1_1101.stats:md5,eadd63d91f47cc6db6b6f0a967a23927" + ], + [ + "s_1_1101.bcl:md5,a1149c80415dc2f34d768eeb397c43fb", + "s_1_1101.stats:md5,ca89a9def67611a9151c6ce685b7cce1" + ], + [ + "s_1_1101.bcl:md5,eb5f71d4741d2f40618756bc72eaf8b4", + "s_1_1101.stats:md5,90f48501e735e5915b843478e23d1ae2" + ], + [ + "s_1_1101.bcl:md5,9bf270fe3f6add1a591ebc24fff10078", + "s_1_1101.stats:md5,a4e429671d4098034293c638aa655e16" + ], + [ + "s_1_1101.bcl:md5,219bedcbd24bae54fe4cf05dae05282c", + "s_1_1101.stats:md5,dd97525b65b68207137d51fcf19132c7" + ], + [ + "s_1_1101.bcl:md5,5163bc00a68fd57ae50cae0b76350892", + "s_1_1101.stats:md5,b606a5368eff1f012f3ea5d11ccdf2e0" + ], + [ + "s_1_1101.bcl:md5,fc429195a5af59a59e0cc4c48e6c05ea", + "s_1_1101.stats:md5,d809aa19698053f90d639da4dcad8008" + ], + [ + "s_1_1101.bcl:md5,383340219a1dd77076a092a64a71a7e4", + "s_1_1101.stats:md5,b204a5cf256378679ffc906c15cc1bae" + ], + [ + "s_1_1101.bcl:md5,0c369540d3e24696cf1f9c55bab69315", + "s_1_1101.stats:md5,a2bc69a4031a22ce9621dcc623a0bf4b" + ], + [ + "s_1_1101.bcl:md5,3127abc8016ba8eb954f8f8015dff387", + "s_1_1101.stats:md5,5deafff31150b7bf757f814e49a53bc2" + ], + [ + "s_1_1101.bcl:md5,045f40c82de676bafec3d59f91376a7a", + "s_1_1101.stats:md5,890700edc20687c090ef52248c7884b1" + ], + [ + "s_1_1101.bcl:md5,78af269aa2b39a1d765703f0a4739a86", + "s_1_1101.stats:md5,303cf457aa1543a8208544f694cbc531" + ], + [ + "s_1_1101.bcl:md5,0ab8c781959b783b62888e9274364a46", + "s_1_1101.stats:md5,2605b0e8322f83aa4d0dae5da4ec7a7a" + ], + [ + "s_1_1101.bcl:md5,d0cf823ffe352e8b3f75d589544ab617", + "s_1_1101.stats:md5,efa3c0e01e3db71e12fd961cb2d03739" + ], + [ + "s_1_1101.bcl:md5,db4ca4ab7a01e03c246f9160c3758d82", + "s_1_1101.stats:md5,f61550d9e4a90df6b860e68f41f82f60" + ], + [ + "s_1_1101.bcl:md5,1af39a2c7e5ff20ece91cb8160b51d17", + "s_1_1101.stats:md5,d0e20879afcaf6dfcd88c73f1c5c78cf" + ], + [ + "s_1_1101.bcl:md5,4cf7123bb0fffcd79266df03aef01665", + "s_1_1101.stats:md5,29bff4075109a121b087116b58d7e927" + ], + [ + "s_1_1101.bcl:md5,aa9980428cb60cd6320f4b48f4dd0d74", + "s_1_1101.stats:md5,6b0e20bde93133117a8d1a6df3d6f37b" + ], + [ + "s_1_1101.bcl:md5,0f6e440374e15b9b491d52fb83a8adfe", + "s_1_1101.stats:md5,55cb5eb0ecdabd23dca39ab8c4607598" + ], + [ + "s_1_1101.bcl:md5,2c645d7bdaddaa403f6e304d36df9e4b", + "s_1_1101.stats:md5,53acf33d21f832779b400c2447386ce4" + ], + [ + "s_1_1101.bcl:md5,3bbf0863b423b770c879203644420206", + "s_1_1101.stats:md5,579bdc7293cac8c3d7407249cacf4c25" + ], + [ + "s_1_1101.bcl:md5,6658a08409e81d29cfeb2d096b491985", + "s_1_1101.stats:md5,bb559ffbea46d612f9933cefa84c4c03" + ], + [ + "s_1_1101.bcl:md5,1700d9a13d3d4f7643af2943ef838acb", + "s_1_1101.stats:md5,f01cb6050ebfb15da1e0399ebd791eb4" + ], + [ + "s_1_1101.bcl:md5,1ac7aa9ffae25eb103f755f33e4a39c6", + "s_1_1101.stats:md5,0b9d45d7929ccf336d5e5b95373ed3c2" + ], + [ + "s_1_1101.bcl:md5,812a97af2e983a53226e18c75190b06c", + "s_1_1101.stats:md5,d2410c7b0e506dab2972e77e2398de1e" + ], + [ + "s_1_1101.bcl:md5,c981e8e4dcc434956c2b86159da268bc", + "s_1_1101.stats:md5,e9c826e85361ce673f1f248786c9a611" + ], + [ + "s_1_1101.bcl:md5,88e09e99a0a4ef3357b203a41b22f77c", + "s_1_1101.stats:md5,ef06f2e5ad667bbd383f9ed6a05b7b42" + ], + [ + "s_1_1101.bcl:md5,461c8b146fc8a7938be38689978ecd09", + "s_1_1101.stats:md5,65115693935da66f9791b27136e22fb0" + ], + [ + "s_1_1101.bcl:md5,c7b827df5ce20e0f21916fe60860ca3f", + "s_1_1101.stats:md5,87be73613aeb507847f94d3cac5bb30a" + ], + [ + "s_1_1101.bcl:md5,7c4cc3dc9c8a1b0f15917b282dfb40ce", + "s_1_1101.stats:md5,bdd9181fa89debbfafe7b6ea3e064065" + ], + [ + "s_1_1101.bcl:md5,19f4debaf91e118aca8934517179ac33", + "s_1_1101.stats:md5,1143082719e136241d21b14a6b19b8a2" + ], + [ + "s_1_1101.bcl:md5,38aa256ad2d697d84b0b2c0e876a3eba", + "s_1_1101.stats:md5,64dd82f03df23f7f437eede2671ed4fe" + ], + [ + "s_1_1101.bcl:md5,b7929970378949571fed922c1b8cab32", + "s_1_1101.stats:md5,3d6d7985a41629fe196e4342d7fe36aa" + ], + [ + "s_1_1101.bcl:md5,fb2ed0bf6e89d79624ee78754e773491", + "s_1_1101.stats:md5,f34940810ff255aee79953496a12716d" + ], + [ + "s_1_1101.bcl:md5,4f8a8311f5f9c3a7629c1a973a7b280e", + "s_1_1101.stats:md5,4fd7cd28c09f4e152e7c2ad1ab541cd2" + ], + [ + "s_1_1101.bcl:md5,9eb46c903d0344e25af51f88cc311d60", + "s_1_1101.stats:md5,df3abd5f620d9e7f99496098d9fd3f7f" + ], + [ + "s_1_1101.bcl:md5,3ecbc17f3660e2014b58d7fe70ae62d5", + "s_1_1101.stats:md5,8e89a13c85a6d6ab3ccd251b66d1f165" + ], + [ + "s_1_1101.bcl:md5,5d59cc2499a77791233a64f73fe82894", + "s_1_1101.stats:md5,32ec99cd400f4b80cb26e2fa8e07ece0" + ], + [ + "s_1_1101.bcl:md5,1c052da47b9ae8554388f0fa3aade482", + "s_1_1101.stats:md5,d23f438772673688aa7bc92421dc6dce" + ], + [ + "s_1_1101.bcl:md5,1a52bd4f23130c0c96bc967ccd448a2b", + "s_1_1101.stats:md5,9b597e3388d59ef1f61aba30ac90ea79" + ], + [ + "s_1_1101.bcl:md5,8a1e84b79cf3f80794c20e3a0cc84688", + "s_1_1101.stats:md5,9561f7b6ef4b1849afc72b2bb49792bd" + ], + [ + "s_1_1101.bcl:md5,75c00111051f3fa95d04286823cb9109", + "s_1_1101.stats:md5,1fe786cdf8181767deafbd60b3c76610" + ], + [ + "s_1_1101.bcl:md5,529255d8deee0873ed5565e6d1a2ebda", + "s_1_1101.stats:md5,3fa7f467e97a75880f32d17b7429d316" + ], + [ + "s_1_1101.bcl:md5,ea4d960e3d9355d2149da71b88a21df4", + "s_1_1101.stats:md5,2540fe65586e8e800c1ddd8cddd1e8cd" + ], + [ + "s_1_1101.bcl:md5,0dfe1fd92a2dce2f23119aa483429744", + "s_1_1101.stats:md5,78257b2169fb9f0cf40966e06e847e86" + ], + [ + "s_1_1101.bcl:md5,f692ddc9aa3ab849271d07c666d0b3b9", + "s_1_1101.stats:md5,aa2ec6a3e3a9c116e34fe74a21e6459e" + ], + [ + "s_1_1101.bcl:md5,29cc4c239eae7c871c9a1adf92ebdb98", + "s_1_1101.stats:md5,263184813090acd740a5bf25304aed3a" + ], + [ + "s_1_1101.bcl:md5,e005af6a84925e326afbfe264241f047", + "s_1_1101.stats:md5,b6fb20868eebaffcc19daa694a449795" + ], + [ + "s_1_1101.bcl:md5,02f1a699b1ba9967accccf99a7af3d24", + "s_1_1101.stats:md5,4f007efacecaf26dc0e0231aede28754" + ], + [ + "s_1_1101.bcl:md5,df308c72a2dcc655cd95e98f5457187a", + "s_1_1101.stats:md5,130c4b07f4c14030bab012824cbe34da" + ], + [ + "s_1_1101.bcl:md5,f3ce10d8d2406b72355023bfa8c96822", + "s_1_1101.stats:md5,2638f4db393ed5b699ec2ce59ff0ec19" + ], + [ + "s_1_1101.bcl:md5,cc2f6d675ad1593ff96f734b172d249e", + "s_1_1101.stats:md5,f5b13f1e1ababc9e1a7a73b0b993cbf1" + ], + [ + "s_1_1101.bcl:md5,7938a0b21448305a951b023b1845b3a7", + "s_1_1101.stats:md5,fcd57511adabfc3ba1ac045165330006" + ], + [ + "s_1_1101.bcl:md5,44879bc6a38df1fee8def61868115041", + "s_1_1101.stats:md5,517e20e4b58a8023a37f9af62e0e2036" + ], + [ + "s_1_1101.bcl:md5,8749611e62406a7d2f34c610a55e56af", + "s_1_1101.stats:md5,8ccf24b3676ef84f2e513be8f2a9f3d1" + ], + [ + "s_1_1101.bcl:md5,a9846a037611cda3721958088f714c0e", + "s_1_1101.stats:md5,6438fa5a1892f328cab1605a95d80a3b" + ], + [ + "s_1_1101.bcl:md5,d6c4a2a726496476eb826532f974ed5f", + "s_1_1101.stats:md5,8c2c65b5e8b00dbf61ada65252aeb266" + ], + [ + "s_1_1101.bcl:md5,be3dde6cae7dd85855a6bf295ebfacfe", + "s_1_1101.stats:md5,93bc13f3b0749b2b8d8bcb0b1199f4f0" + ], + [ + "s_1_1101.bcl:md5,7c64514735a6cf1565b60647edd17d20", + "s_1_1101.stats:md5,4a0aa6c49b24f876415e5878cef7f805" + ], + [ + "s_1_1101.bcl:md5,3983b4043bc9df4b505202a5134ccf03", + "s_1_1101.stats:md5,1c9d9a8558adc1279ca27c96bc1b9758" + ], + [ + "s_1_1101.bcl:md5,a0b8d77f116ec95975f9253dcb768136", + "s_1_1101.stats:md5,c3992b786756e7ec42f65ef4b13b50d4" + ], + [ + "s_1_1101.bcl:md5,43c95ba35d06bb7c57fbd16f3d1cfd6c", + "s_1_1101.stats:md5,3cb69d04698c39f97f962e5bf1eea7f0" + ], + [ + "s_1_1101.bcl:md5,3dbeea0cad7052f19f53ff6f19dd4d90", + "s_1_1101.stats:md5,58bbc8254f0f5f4a244531e8e9c12a04" + ], + [ + "s_1_1101.bcl:md5,da56d088996376c898d855b6cd0a7dfc", + "s_1_1101.stats:md5,9f2d78af6908ce1576b89cdc059844ff" + ], + [ + "s_1_1101.bcl:md5,7b641a5565f095e9a6ffcad9e4305033", + "s_1_1101.stats:md5,3ada06c59b4fb41b83ab6abd0979e9fc" + ], + [ + "s_1_1101.bcl:md5,a3843d397a01d51657825bb652c191e5", + "s_1_1101.stats:md5,19341e52a4bfc7d9d48e9d2acc68c519" + ], + [ + "s_1_1101.bcl:md5,048e3ebfc8efeb8012def6b741c9060d", + "s_1_1101.stats:md5,88bd38deca1e87d700effab1fd099565" + ], + [ + "s_1_1101.bcl:md5,b340db0e07e829dd5da22371916a1a9e", + "s_1_1101.stats:md5,e44cfaddcc4ffb968e5b1a2f41ac48a5" + ], + [ + "s_1_1101.bcl:md5,e6011ec6eabbc2b8792deb283c621ce0", + "s_1_1101.stats:md5,090875dcd1a431af24bc631333f089c4" + ], + [ + "s_1_1101.bcl:md5,a08f216e3352345031ed100ec4245082", + "s_1_1101.stats:md5,97b949ef4b96219e1369f673cf5f8a6c" + ], + [ + "s_1_1101.bcl:md5,b43337c76fb037dfcf5f8f7bcb3618e5", + "s_1_1101.stats:md5,ddef585805e79951f69d23ab7354f69b" + ], + [ + "s_1_1101.bcl:md5,8c61fd004104397b360855e058bbf1bf", + "s_1_1101.stats:md5,0f8d253816d594dcfea3ccf48c826401" + ], + [ + "s_1_1101.bcl:md5,594d06310d328b188aa0b3edfff22cb2", + "s_1_1101.stats:md5,3160bf271b39aeb7590e4fd2984710ba" + ], + [ + "s_1_1101.bcl:md5,4c9eada67c9d55437211d83e111961d5", + "s_1_1101.stats:md5,2901b46ab16ec4863d30e4c84ec29c97" + ], + [ + "s_1_1101.bcl:md5,e03971ae5282f0accc0c1b7374d9ef1b", + "s_1_1101.stats:md5,60d2a19ce59bf70a21a28555484cead8" + ], + [ + "s_1_1101.bcl:md5,e1c6f7a06e63d149895d3e48e63df155", + "s_1_1101.stats:md5,44beb10af847ea3dddaf06dda7031126" + ], + [ + "s_1_1101.bcl:md5,960a99bf29a8f9d936e9b8582d46c9c6", + "s_1_1101.stats:md5,544cd1a7aaaa841914b40ece43399334" + ], + [ + "s_1_1101.bcl:md5,5706679f349fd4a6b6313bc2c41c7a42", + "s_1_1101.stats:md5,627eea844b26dae033848c2f9f69177b" + ], + [ + "s_1_1101.bcl:md5,21da5abc4b0402bbac14b5ab998b0b4f", + "s_1_1101.stats:md5,515bd140b095ad90473ca7a9a69877ab" + ], + "s_1_1101.control:md5,08a72e2198ae95150718e8adf011d105", + "s_1_1101.filter:md5,3a72bc73b323c8cb0ac5bfeb62d98989" + ] + ], + [ + "s_1_1101.locs:md5,0827ea802e5257cc5b20e757a33d4c98" + ], + "RTAConfiguration.xml:md5,c7d6e257bc374f142dc64b9d2281d4c9", + "config.xml:md5,9a4cc7ec01fefa2f1ce9bcb45bbad6e9" + ] + ], + [ + "ControlMetricsOut.bin:md5,6d77b38d0793a6e1ce1e85706e488953", + "CorrectedIntMetricsOut.bin:md5,2bbf84d3be72734addaa2fe794711434", + "ErrorMetricsOut.bin:md5,38c88def138e9bb832539911affdb286", + "ExtractionMetricsOut.bin:md5,7497c3178837eea8f09350b5cd252e99", + "IndexMetricsOut.bin:md5,d41d8cd98f00b204e9800998ecf8427e", + "QMetricsOut.bin:md5,7e9f198d53ebdfbb699a5f94cf1ed51c", + "TileMetricsOut.bin:md5,83891751ec1c91a425a524b476b6ca3c" + ], + "RunInfo.xml:md5,03038959f4dd181c86bc97ae71fe270a" + ] + ] + ] + ], + "timestamp": "2023-10-18T11:56:39.562418" + }, + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "timestamp": "2023-10-18T11:56:46.878844" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "timestamp": "2023-10-18T11:56:08.16574" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 0000000..feb6f15 --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/vcftools/environment.yml b/modules/nf-core/vcftools/environment.yml index 875817e..503449e 100644 --- a/modules/nf-core/vcftools/environment.yml +++ b/modules/nf-core/vcftools/environment.yml @@ -1,3 +1,4 @@ +name: vcftools channels: - conda-forge - bioconda diff --git a/modules/nf-core/vcftools/main.nf b/modules/nf-core/vcftools/main.nf index b36c429..0e61955 100644 --- a/modules/nf-core/vcftools/main.nf +++ b/modules/nf-core/vcftools/main.nf @@ -94,7 +94,6 @@ process VCFTOOLS { (args.contains('--hapcount')) ? "--hapcount ${bed}" : (args.contains('--positions')) ? "--positions ${bed}" : (args.contains('--exclude-positions')) ? "--exclude-positions ${bed}" : '' - args_list.removeIf { it.contains('--bed') } args_list.removeIf { it.contains('--exclude-bed') } args_list.removeIf { it.contains('--hapcount') } diff --git a/modules/nf-core/vcftools/vcftools.diff b/modules/nf-core/vcftools/vcftools.diff deleted file mode 100644 index fd2e9ec..0000000 --- a/modules/nf-core/vcftools/vcftools.diff +++ /dev/null @@ -1,33 +0,0 @@ -Changes in module 'nf-core/vcftools' ---- modules/nf-core/vcftools/meta.yml -+++ modules/nf-core/vcftools/meta.yml -@@ -1,6 +1,7 @@ - name: vcftools - description: A set of tools written in Perl and C++ for working with VCF files - keywords: -+ - VCFtools - - VCF - - sort - tools: - ---- modules/nf-core/vcftools/main.nf -+++ modules/nf-core/vcftools/main.nf -@@ -91,10 +91,15 @@ - - def bed_arg = (args.contains('--bed')) ? "--bed ${bed}" : - (args.contains('--exclude-bed')) ? "--exclude-bed ${bed}" : -- (args.contains('--hapcount')) ? "--hapcount ${bed}" : '' -+ (args.contains('--hapcount')) ? "--hapcount ${bed}" : -+ (args.contains('--positions')) ? "--positions ${bed}" : -+ (args.contains('--exclude-positions')) ? "--exclude-positions ${bed}" : '' -+ - args_list.removeIf { it.contains('--bed') } - args_list.removeIf { it.contains('--exclude-bed') } - args_list.removeIf { it.contains('--hapcount') } -+ args_list.removeIf { it.contains('--positions') } -+ args_list.removeIf { it.contains('--exclude-positions') } - - def diff_variant_arg = (args.contains('--diff')) ? "--diff ${diff_variant_file}" : - (args.contains('--gzdiff')) ? "--gzdiff ${diff_variant_file}" : - -************************************************************ diff --git a/nextflow.config b/nextflow.config index 90da5d8..bb1f0f6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,10 +13,12 @@ params { input = null fasta = null fai = null + align = false interval = null include_positions = null exclude_positions = null split_fasta_cutoff = 100000 + vector_db = "${projectDir}/assets/vectorDB.tar.gz" // Boilerplate options outdir = 'results' @@ -166,8 +168,10 @@ profiles { executor.cpus = 16 executor.memory = 60.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_align { includeConfig 'conf/test_align.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_align { includeConfig 'conf/test_full_align.config' } } diff --git a/nextflow_schema.json b/nextflow_schema.json index e035219..c857174 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -28,6 +28,12 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, + "vector_db": { + "type": "string", + "default": "${projectDir}/assets/vectorDB.tar.gz", + "description": "Path to directory or tar.gz archive for pre-built PacBio vector database.", + "format": "file-path" + }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -51,6 +57,10 @@ "type": "string", "description": "Path to the index file of the FASTA genome file, either fai or gzi." }, + "align": { + "type": "boolean", + "description": "Align the input reads to the reference" + }, "interval": { "type": "string", "description": "Interval bed file." diff --git a/subworkflows/local/align_pacbio.nf b/subworkflows/local/align_pacbio.nf new file mode 100644 index 0000000..75f4ac2 --- /dev/null +++ b/subworkflows/local/align_pacbio.nf @@ -0,0 +1,61 @@ +// +// Align PacBio read files against the genome +// + +include { FILTER_PACBIO } from '../../subworkflows/local/filter_pacbio' +include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { CONVERT_STATS } from '../../subworkflows/local/convert_stats' + + +workflow ALIGN_PACBIO { + take: + fasta // channel: [ val(meta), /path/to/fasta ] + reads // channel: [ val(meta), /path/to/datafile ] + db // channel: /path/to/vector_db + + + main: + ch_versions = Channel.empty() + + + // Filter BAM and output as FASTQ + FILTER_PACBIO ( reads, db ) + ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions ) + + + // Align Fastq to Genome + MINIMAP2_ALIGN ( FILTER_PACBIO.out.fastq, fasta, true, false, false ) + ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) + + + // Collect all alignment output by sample name + MINIMAP2_ALIGN.out.bam + | map { meta, bam -> [['id': meta.sample, 'datatype': meta.datatype, 'sample': meta.sample ], bam] } + | groupTuple ( by: [0] ) + | set { ch_bams } + + + // Merge + SAMTOOLS_MERGE ( ch_bams, [ [], [] ], [ [], [] ] ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + + + // Convert merged BAM to CRAM and calculate indices and statistics + SAMTOOLS_MERGE.out.bam + | map { meta, bam -> [ meta, bam, [] ] } + | set { ch_sort } + + CONVERT_STATS ( ch_sort, fasta ) + ch_versions = ch_versions.mix ( CONVERT_STATS.out.versions ) + + + emit: + cram = CONVERT_STATS.out.cram // channel: [ val(meta), /path/to/cram ] + crai = CONVERT_STATS.out.crai // channel: [ val(meta), /path/to/crai ] + stats = CONVERT_STATS.out.stats // channel: [ val(meta), /path/to/stats ] + idxstats = CONVERT_STATS.out.idxstats // channel: [ val(meta), /path/to/idxstats ] + flagstat = CONVERT_STATS.out.flagstat // channel: [ val(meta), /path/to/flagstat ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/convert_stats.nf b/subworkflows/local/convert_stats.nf new file mode 100644 index 0000000..9118e8d --- /dev/null +++ b/subworkflows/local/convert_stats.nf @@ -0,0 +1,53 @@ +// +// Convert BAM to CRAM, create index and calculate statistics +// + +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' + + +workflow CONVERT_STATS { + take: + bam // channel: [ val(meta), /path/to/bam, /path/to/bai] + fasta // channel: [ val(meta), /path/to/fasta ] + + + main: + ch_versions = Channel.empty() + + // Convert BAM to CRAM + SAMTOOLS_VIEW ( bam, fasta, [ ] ) + ch_versions = ch_versions.mix ( SAMTOOLS_VIEW.out.versions.first() ) + + + // Combine CRAM and CRAI into one channel + SAMTOOLS_VIEW.out.cram + | join ( SAMTOOLS_VIEW.out.crai ) + | set { ch_cram_crai } + + + // Calculate statistics + SAMTOOLS_STATS ( ch_cram_crai, fasta ) + ch_versions = ch_versions.mix ( SAMTOOLS_STATS.out.versions.first() ) + + + // Calculate statistics based on flag values + SAMTOOLS_FLAGSTAT ( ch_cram_crai ) + ch_versions = ch_versions.mix ( SAMTOOLS_FLAGSTAT.out.versions.first() ) + + + // Calculate index statistics + SAMTOOLS_IDXSTATS ( ch_cram_crai ) + ch_versions = ch_versions.mix ( SAMTOOLS_IDXSTATS.out.versions.first() ) + + + emit: + cram = SAMTOOLS_VIEW.out.cram // channel: [ val(meta), /path/to/cram ] + crai = SAMTOOLS_VIEW.out.crai // channel: [ val(meta), /path/to/crai ] + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), /path/to/stats ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), /path/to/idxstats ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), /path/to/flagstat ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/deepvariant_caller.nf b/subworkflows/local/deepvariant_caller.nf index 10b5359..7e4adf0 100644 --- a/subworkflows/local/deepvariant_caller.nf +++ b/subworkflows/local/deepvariant_caller.nf @@ -14,7 +14,11 @@ workflow DEEPVARIANT_CALLER { ch_versions = Channel.empty() reads_fasta.map { meta, cram, crai, interval, fasta_file_name, fasta, fai -> - [ [ id: meta.id + "_" + fasta_file_name, sample: meta.id, type: meta.type ], + [ [ id: meta.id + "_" + fasta_file_name, + sample: meta.id, + type: meta.datatype, + fasta_file_name: fasta_file_name + ], cram, crai, interval @@ -23,14 +27,14 @@ workflow DEEPVARIANT_CALLER { // fasta fasta = reads_fasta.map { meta, cram, crai, interval, fasta_file_name, fasta, fai -> - [ [ id: meta.id + "_" + fasta_file_name, sample: meta.id, type: meta.type ], + [ [ id: meta.id + "_" + fasta_file_name, sample: meta.id, type: meta.datatype ], fasta ] } // fai fai = reads_fasta.map{ meta, cram, crai, interval, fasta_file_name, fasta, fai -> - [ [ id: meta.id + "_" + fasta_file_name, sample: meta.id, type: meta.type ], + [ [ id: meta.id + "_" + fasta_file_name, sample: meta.id, type: meta.datatype ], fai ] } @@ -44,9 +48,15 @@ workflow DEEPVARIANT_CALLER { // group the vcf files together by sample DEEPVARIANT.out.vcf - .map { meta, vcf -> [ meta.sample, vcf ] } + .map { meta, vcf -> [ + [ id: meta.fasta_file_name.tokenize(".")[0..-2].join(".") + + "." + meta.type + + "." + meta.sample + ], + vcf + ] } .groupTuple() - .map { sample, vcf -> [ [id: sample], vcf, [] ] } + .map { meta, vcf -> [ meta, vcf, [] ] } .set { vcf } // catcat vcf files @@ -55,9 +65,15 @@ workflow DEEPVARIANT_CALLER { // group the g vcf files together by sample DEEPVARIANT.out.gvcf - .map { meta, gvcf -> [ meta.sample, gvcf ] } + .map { meta, gvcf -> [ + [ id: meta.fasta_file_name.tokenize(".")[0..-2].join(".") + + "." + meta.type + + "." + meta.sample + ], + gvcf + ] } .groupTuple() - .map { sample, gvcf -> [ [ id: sample ], gvcf, [] ] } + .map { meta, gvcf -> [ meta, gvcf, [] ] } .set { g_vcf } // catcat g vcf files diff --git a/subworkflows/local/filter_pacbio.nf b/subworkflows/local/filter_pacbio.nf new file mode 100644 index 0000000..2e306bf --- /dev/null +++ b/subworkflows/local/filter_pacbio.nf @@ -0,0 +1,78 @@ +// +// Filter PacBio reads +// Original protocol is a modified version by Shane of the original program, HiFiAdapterFilt +// + +include { SAMTOOLS_VIEW as SAMTOOLS_CONVERT } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_COLLATE } from '../../modules/nf-core/samtools/collate/main' +include { SAMTOOLS_FASTA } from '../../modules/nf-core/samtools/fasta/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' +include { PACBIO_FILTER } from '../../modules/local/pacbio_filter' +include { SAMTOOLS_VIEW as SAMTOOLS_FILTER } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' + + +workflow FILTER_PACBIO { + take: + reads // channel: [ val(meta), /path/to/datafile ] + db // channel: /path/to/vector_db + + + main: + ch_versions = Channel.empty() + + + // Convert from PacBio BAM to Samtools BAM + reads + | map { meta, bam -> [ meta, bam, [] ] } + | set { ch_pacbio } + + SAMTOOLS_CONVERT (ch_pacbio, [ [], [] ], [] ) + ch_versions = ch_versions.mix ( SAMTOOLS_CONVERT.out.versions.first() ) + + + // Collate BAM file to create interleaved FASTA + SAMTOOLS_COLLATE ( SAMTOOLS_CONVERT.out.bam, [] ) + ch_versions = ch_versions.mix ( SAMTOOLS_COLLATE.out.versions.first() ) + + + // Convert BAM to FASTA + SAMTOOLS_FASTA ( SAMTOOLS_COLLATE.out.bam, true ) + ch_versions = ch_versions.mix ( SAMTOOLS_FASTA.out.versions.first() ) + + + // Gunzip FASTA file to BLAST + GUNZIP ( SAMTOOLS_FASTA.out.other ) + ch_versions = ch_versions.mix ( GUNZIP.out.versions.first() ) + + + // Nucleotide BLAST + db.map{db -> [ [], db]}.set{ch_db} + BLAST_BLASTN ( GUNZIP.out.gunzip, ch_db ) + ch_versions = ch_versions.mix ( BLAST_BLASTN.out.versions.first() ) + + + // Filter BLAST output + PACBIO_FILTER ( BLAST_BLASTN.out.txt ) + ch_versions = ch_versions.mix ( PACBIO_FILTER.out.versions.first() ) + + + // Create filtered BAM file + SAMTOOLS_CONVERT.out.bam + | join ( SAMTOOLS_CONVERT.out.csi ) + | set { ch_reads } + + SAMTOOLS_FILTER ( ch_reads, [ [], [] ], PACBIO_FILTER.out.list ) + ch_versions = ch_versions.mix ( SAMTOOLS_FILTER.out.versions.first() ) + + + // Convert BAM to FASTQ + SAMTOOLS_FASTQ ( SAMTOOLS_FILTER.out.unoutput, true ) + ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() ) + + + emit: + fastq = SAMTOOLS_FASTQ.out.other // channel: [ meta, /path/to/fastq ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 7e9f667..b71f3fd 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -12,13 +12,33 @@ workflow INPUT_CHECK { SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) - .map { [ - [ id: it.sample, sample: it.sample, type: it.datatype ], - file(it.datafile) - ] } + .map { create_data_channel( it ) } .set { reads } - + emit: reads // channel: [ val(meta), data ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] } + +// Function to get list of [ meta, reads ] +def create_data_channel ( LinkedHashMap row ) { + // create meta map + def meta = [:] + meta.id = row.sample + meta.sample = row.sample.split('_')[0..-2].join('_') + meta.datatype = row.datatype + + if ( meta.datatype == "pacbio" ) { + platform = "PACBIO" + } + meta.read_group = "\'@RG\\tID:" + row.datafile.split('/')[-1].split('\\.')[0..-2].join('.') + "\\tPL:" + platform + "\\tSM:" + meta.sample + "\'" + + // add path(s) of the read file(s) to the meta map + def data_meta = [] + if ( !file(row.datafile).exists() ) { + exit 1, "ERROR: Please check input samplesheet -> Data file does not exist!\n${row.datafile}" + } else { + data_meta = [ meta, file(row.datafile) ] + } + return data_meta +} diff --git a/subworkflows/local/input_merge.nf b/subworkflows/local/input_merge.nf index 90bb82f..12f2653 100644 --- a/subworkflows/local/input_merge.nf +++ b/subworkflows/local/input_merge.nf @@ -7,8 +7,8 @@ include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort' workflow INPUT_MERGE { take: - fasta // file: /path/to/genome.fasta or /path/to/genome.fasta.gz - fai // file: /path/to/genome.*.fai or /path/to/genome.fasta.gz.gzi + fasta // channel: [ val(meta), /path/to/genome.fasta or /path/to/genome.fasta.gz ] + fai // channel: [ val(meta), /path/to/genome.*.fai or /path/to/genome.fasta.gz.gzi ] reads // channel: [ val(meta), data ] main: @@ -37,20 +37,17 @@ workflow INPUT_MERGE { .map { sample, meta_list -> [sample, meta_list[0]] } .join( grouped_reads ) .map { sample, meta, bam_cram_list -> [ - [ id: ( bam_cram_list.size() == 1 ) ? sample : sample + '_combined', - type: meta.type + [ id: sample, + datatype: meta.datatype ], bam_cram_list ]} .set { grouped_reads_with_meta } // call samtool merge - ch_fasta = fasta.map { fasta -> [ [ 'id': fasta.baseName ], fasta ] }.first() - ch_fai = fai.map { fai -> [ [ 'id': fai.baseName ], fai ] }.first() - SAMTOOLS_MERGE( grouped_reads_with_meta, - ch_fasta, - ch_fai + fasta, + fai ) ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions ) diff --git a/workflows/variantcalling.nf b/workflows/variantcalling.nf index c88480f..f3e7450 100644 --- a/workflows/variantcalling.nf +++ b/workflows/variantcalling.nf @@ -44,12 +44,6 @@ if ( (params.include_positions) && (params.exclude_positions) ){ ch_positions = [] } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -60,6 +54,7 @@ if ( (params.include_positions) && (params.exclude_positions) ){ // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { ALIGN_PACBIO } from '../subworkflows/local/align_pacbio' include { INPUT_MERGE } from '../subworkflows/local/input_merge' include { INPUT_FILTER_SPLIT } from '../subworkflows/local/input_filter_split' include { DEEPVARIANT_CALLER } from '../subworkflows/local/deepvariant_caller' @@ -75,7 +70,8 @@ include { PROCESS_VCF } from '../subworkflows/local/process_vcf' // MODULE: Installed directly from nf-core/modules // include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' +include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' +include { UNTAR } from '../modules/nf-core/untar/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -88,35 +84,30 @@ include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' workflow VARIANTCALLING { ch_versions = Channel.empty() + ch_fasta + .map { fasta -> [ [ 'id': fasta.baseName - '.fasta' - '.fa' ], fasta ] } + .first() + .set { ch_genome } // // check reference fasta index given or not // if( params.fai == null ){ - ch_fasta - .map { fasta -> [ [ 'id': fasta.baseName ], fasta ] } - .set { ch_genome } - SAMTOOLS_FAIDX ( ch_genome, [[], []] ) ch_versions = ch_versions.mix( SAMTOOLS_FAIDX.out.versions ) - SAMTOOLS_FAIDX.out.fai - .map{ mata, fai -> fai } - .set{ ch_fai } - - SAMTOOLS_FAIDX.out.gzi - .map{ meta, gzi -> gzi } - .set{ ch_gzi } - if( params.fasta.endsWith('.gz') ){ - ch_index = ch_gzi + ch_genome_index = SAMTOOLS_FAIDX.out.gzi }else{ - ch_index = ch_fai + ch_genome_index = SAMTOOLS_FAIDX.out.fai } }else{ - ch_index = ch_fai + ch_index + .map { fai -> [ [ 'id': fai.baseName ], fai ] } + .first() + .set { ch_genome_index } } // @@ -127,28 +118,65 @@ workflow VARIANTCALLING { ) ch_versions = ch_versions.mix( INPUT_CHECK.out.versions ) + // - // SUBWORKFLOW: merge the input reads by sample name + // SUBWORKFLOW: align reads if required // - INPUT_MERGE ( - ch_fasta, - ch_index, - INPUT_CHECK.out.reads, - ) - ch_versions = ch_versions.mix( INPUT_MERGE.out.versions ) + if( params.align ){ + + if ( params.vector_db.endsWith( '.tar.gz' ) ) { + + UNTAR ( [ [:], params.vector_db ] ).untar + | map { meta, file -> file } + | set { ch_vector_db } + ch_versions = ch_versions.mix ( UNTAR.out.versions ) + } else { + + Channel.fromPath ( params.vector_db ) + | set { ch_vector_db } + + } + + ALIGN_PACBIO ( + ch_genome, + INPUT_CHECK.out.reads, + ch_vector_db + ) + ch_versions = ch_versions.mix( ALIGN_PACBIO.out.versions ) + + ALIGN_PACBIO.out.cram + .join( ALIGN_PACBIO.out.crai ) + .set{ ch_aligned_reads } + + } else { + + // + // SUBWORKFLOW: merge the input reads by sample name + // + INPUT_MERGE ( + ch_genome, + ch_genome_index, + INPUT_CHECK.out.reads, + ) + ch_versions = ch_versions.mix( INPUT_MERGE.out.versions ) + ch_aligned_reads = INPUT_MERGE.out.indexed_merged_reads + + } + // // SUBWORKFLOW: split the input fasta file and filter input reads // INPUT_FILTER_SPLIT ( ch_fasta, - INPUT_MERGE.out.indexed_merged_reads, + ch_aligned_reads, ch_interval, split_fasta_cutoff ) ch_versions = ch_versions.mix( INPUT_FILTER_SPLIT.out.versions ) + // // SUBWORKFLOW: call deepvariant // @@ -157,6 +185,7 @@ workflow VARIANTCALLING { ) ch_versions = ch_versions.mix( DEEPVARIANT_CALLER.out.versions ) + // // convert VCF channel meta id // @@ -170,6 +199,7 @@ workflow VARIANTCALLING { PROCESS_VCF( vcf, ch_positions ) ch_versions = ch_versions.mix( PROCESS_VCF.out.versions ) + // // MODULE: Combine different version together //