diff --git a/modules.json b/modules.json index a57849ee..47c20082 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "pfr": { "bwa/index": { "branch": "main", - "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fastq_bwa_mem_samblaster"] }, "bwa/mem": { "branch": "main", - "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fastq_bwa_mem_samblaster"] }, "cat/cat": { "branch": "main", - "git_sha": "4b9da80b1e4c16067babd97554bea42d7cd9ca85", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fasta_ltrretriever_lai"] }, "custom/checkgff3fastacorrespondence": { @@ -57,7 +57,7 @@ }, "ltrharvest": { "branch": "main", - "git_sha": "835879b8f174bb4d2c5534d0381ffbe62cc1b060", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fasta_ltrretriever_lai"] }, "ltrretriever/lai": { @@ -72,7 +72,7 @@ }, "samblaster": { "branch": "main", - "git_sha": "73358a6712178b9a67c39f92e65e8144b5880eae", + "git_sha": "6410ddc6dfcee5ed480f47199cbd527662cdf5fe", "installed_by": ["fastq_bwa_mem_samblaster"] } } @@ -81,7 +81,7 @@ "pfr": { "fasta_ltrretriever_lai": { "branch": "main", - "git_sha": "154661d7c1769532ff7b5f11259644ec200dd47d", + "git_sha": "60ee50c79f8f868fe0b2e48cba543ad1d8fa89b2", "installed_by": ["subworkflows"] }, "fastq_bwa_mem_samblaster": { diff --git a/modules/pfr/bwa/index/tests/main.nf.test b/modules/pfr/bwa/index/tests/main.nf.test index 2f33c0e8..af33e73c 100644 --- a/modules/pfr/bwa/index/tests/main.nf.test +++ b/modules/pfr/bwa/index/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } diff --git a/modules/pfr/bwa/mem/tests/main.nf.test b/modules/pfr/bwa/mem/tests/main.nf.test index cd6591ff..2696e4bf 100644 --- a/modules/pfr/bwa/mem/tests/main.nf.test +++ b/modules/pfr/bwa/mem/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -31,7 +31,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -58,7 +58,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -71,7 +71,7 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -98,7 +98,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -111,8 +111,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index @@ -139,7 +139,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -152,8 +152,8 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index diff --git a/modules/pfr/cat/cat/tests/main.nf.test b/modules/pfr/cat/cat/tests/main.nf.test index aaae04f9..fcee2d19 100644 --- a/modules/pfr/cat/cat/tests/main.nf.test +++ b/modules/pfr/cat/cat/tests/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { [ [ id:'genome', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -45,8 +45,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -72,8 +72,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -83,7 +83,8 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} ) } } @@ -101,8 +102,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -130,8 +131,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -141,7 +142,8 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} ) } } @@ -158,7 +160,7 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] """ @@ -174,4 +176,3 @@ nextflow_process { } } } - diff --git a/modules/pfr/cat/cat/tests/main.nf.test.snap b/modules/pfr/cat/cat/tests/main.nf.test.snap index 0c9bfe8d..423571ba 100644 --- a/modules/pfr/cat/cat/tests/main.nf.test.snap +++ b/modules/pfr/cat/cat/tests/main.nf.test.snap @@ -1,4 +1,10 @@ { + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, "test_cat_unzipped_unzipped": { "content": [ { @@ -61,36 +67,31 @@ ], "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped": { + "test_cat_zipped_zipped_lines": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] ], - "timestamp": "2024-01-12T14:02:02.999254641" + "timestamp": "2023-10-16T14:33:08.038830506" }, "test_cat_one_file_unzipped_zipped_lines": { "content": [ @@ -105,41 +106,16 @@ ], "timestamp": "2023-10-16T14:33:21.39642399" }, - "test_cat_unzipped_zipped": { + "test_cat_zipped_zipped_size": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } + 78 ], - "timestamp": "2024-01-12T14:08:26.948048418" + "timestamp": "2023-10-16T14:32:33.641869244" }, "test_cat_one_file_unzipped_zipped_size": { "content": [ 374 ], - "timestamp": "2024-01-12T14:10:22.445700266" + "timestamp": "2023-10-16T14:33:21.4094373" } -} +} \ No newline at end of file diff --git a/modules/pfr/ltrharvest/meta.yml b/modules/pfr/ltrharvest/meta.yml index efda0670..256b3ce5 100644 --- a/modules/pfr/ltrharvest/meta.yml +++ b/modules/pfr/ltrharvest/meta.yml @@ -12,13 +12,12 @@ keywords: - transposons - retrotransposons tools: - - "edta": - description: Extensive de-novo TE Annotator (EDTA) - homepage: "https://github.com/oushujun/EDTA" - documentation: "https://github.com/oushujun/EDTA" - tool_dev_url: "https://github.com/oushujun/EDTA" - doi: "10.1186/s13059-019-1905-y" - licence: ["GPL v3"] + - "LTR_HARVEST_parallel": + description: A Perl wrapper for LTR_harvest + homepage: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + documentation: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + tool_dev_url: "https://github.com/oushujun/EDTA/tree/v2.2.0/bin/LTR_HARVEST_parallel" + licence: ["MIT"] - "gt": description: "The GenomeTools genome analysis system" homepage: "https://genometools.org/index.html" diff --git a/modules/pfr/samblaster/environment.yml b/modules/pfr/samblaster/environment.yml index a6e3038a..ac838241 100644 --- a/modules/pfr/samblaster/environment.yml +++ b/modules/pfr/samblaster/environment.yml @@ -1,9 +1,11 @@ name: samblaster + channels: - conda-forge - bioconda - defaults + dependencies: + - bioconda::htslib=1.19.1 - bioconda::samblaster=0.1.26 - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 diff --git a/modules/pfr/samblaster/meta.yml b/modules/pfr/samblaster/meta.yml index ccb48320..5c1e5a97 100644 --- a/modules/pfr/samblaster/meta.yml +++ b/modules/pfr/samblaster/meta.yml @@ -51,3 +51,4 @@ authors: - "@lescai" maintainers: - "@lescai" + - "@gallvp" diff --git a/modules/pfr/samblaster/tests/main.nf.test b/modules/pfr/samblaster/tests/main.nf.test new file mode 100644 index 00000000..01794307 --- /dev/null +++ b/modules/pfr/samblaster/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process SAMBLASTER" + script "../main.nf" + process "SAMBLASTER" + + tag "modules" + tag "modules_nfcore" + tag "samblaster" + + test("homo_sapiens-test_paired_end_umi_unsorted_bam") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/pfr/samblaster/tests/main.nf.test.snap b/modules/pfr/samblaster/tests/main.nf.test.snap new file mode 100644 index 00000000..917c8f1f --- /dev/null +++ b/modules/pfr/samblaster/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:04:42.510824" + }, + "homo_sapiens-test_paired_end_umi_unsorted_bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,634a6bd541478e970f0a4c279f399889" + ] + ], + "1": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,634a6bd541478e970f0a4c279f399889" + ] + ], + "versions": [ + "versions.yml:md5,8a70467f2dfc2e0d8e81787223d2fc77" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-26T14:04:38.118875" + } +} \ No newline at end of file diff --git a/modules/pfr/samblaster/tests/nextflow.config b/modules/pfr/samblaster/tests/nextflow.config new file mode 100644 index 00000000..605e74eb --- /dev/null +++ b/modules/pfr/samblaster/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: SAMBLASTER { + ext.args = '-M --addMateTags' + ext.prefix = { "${meta.id}.processed" } + } +} diff --git a/modules/pfr/samblaster/tests/tags.yml b/modules/pfr/samblaster/tests/tags.yml new file mode 100644 index 00000000..3882ee54 --- /dev/null +++ b/modules/pfr/samblaster/tests/tags.yml @@ -0,0 +1,2 @@ +samblaster: + - "modules/nf-core/samblaster/**" diff --git a/nextflow.config b/nextflow.config index c15d4da8..795a3f41 100644 --- a/nextflow.config +++ b/nextflow.config @@ -145,11 +145,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - // docker.runOptions = '-u $(id -u):$(id -g)' DNADIFF fails + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - // docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' DNADIFF fails - docker.runOptions = '--platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf index 566e6821..ea5183b1 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf +++ b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf @@ -9,50 +9,59 @@ include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoreg workflow FASTA_LTRRETRIEVER_LAI { take: - ch_fasta // channel: [ val(meta), fasta ] - ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed - skip_lai // val; true|false + ch_fasta // channel: [ val(meta), fasta ] + ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed + // val(meta) from ch_fasta and ch_monoploid_seqs are only required + // to have the same `id` + skip_lai // val(true|false) main: - - ch_versions = Channel.empty() + ch_versions = Channel.empty() // MOUDLE: CUSTOM_SHORTENFASTAIDS CUSTOM_SHORTENFASTAIDS ( ch_fasta ) - ch_short_ids_fasta = ch_fasta - | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) - | map { meta, fasta, short_ids_fasta -> - [ meta, short_ids_fasta ?: fasta ] - } - - ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv - ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) + ch_short_ids_fasta = ch_fasta + | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) + | map { meta, fasta, short_ids_fasta -> + [ meta, short_ids_fasta ?: fasta ] + } + + ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv + ch_short_monoploid_seqs = ch_short_ids_tsv + | join( + ch_monoploid_seqs ?: Channel.empty() + ) + | map { meta, short_ids_tsv, monoploid_seqs -> + map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) + } + ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) // MODULE: LTRHARVEST LTRHARVEST ( ch_short_ids_fasta ) - ch_ltrharvest_scn = LTRHARVEST.out.scn - ch_versions = ch_versions.mix(LTRHARVEST.out.versions.first()) + ch_ltrharvest_scn = LTRHARVEST.out.scn + ch_versions = ch_versions.mix(LTRHARVEST.out.versions.first()) // MODULE: LTRFINDER LTRFINDER ( ch_short_ids_fasta ) - ch_ltrfinder_scn = LTRFINDER.out.scn - ch_versions = ch_versions.mix(LTRFINDER.out.versions.first()) + ch_ltrfinder_scn = LTRFINDER.out.scn + ch_versions = ch_versions.mix(LTRFINDER.out.versions.first()) // MODULE: CAT_CAT - ch_cat_cat_inputs = ch_ltrharvest_scn - | join(ch_ltrfinder_scn) - | map { meta, harvested, found -> [ meta, [ harvested, found ] ] } + ch_cat_cat_inputs = ch_ltrharvest_scn + | join(ch_ltrfinder_scn) + | map { meta, harvested, found -> [ meta, [ harvested, found ] ] } CAT_CAT ( ch_cat_cat_inputs ) - ch_ltr_candidates = CAT_CAT.out.file_out - ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + ch_ltr_candidates = CAT_CAT.out.file_out + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) // MODULE: LTRRETRIEVER_LTRRETRIEVER - ch_ltrretriever_inputs = ch_short_ids_fasta.join(ch_ltr_candidates) + ch_ltrretriever_inputs = ch_short_ids_fasta.join(ch_ltr_candidates) + LTRRETRIEVER_LTRRETRIEVER ( ch_ltrretriever_inputs.map { meta, fasta, ltr -> [ meta, fasta ] }, ch_ltrretriever_inputs.map { meta, fasta, ltr -> ltr }, @@ -61,26 +70,30 @@ workflow FASTA_LTRRETRIEVER_LAI { [] ) - ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list - ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib - ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out - ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff - ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first()) + ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list + ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib + ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out + ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff + ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first()) // MODULE: LAI - ch_lai_inputs = skip_lai - ? Channel.empty() - : ch_short_ids_fasta - | join(ch_pass_list) - | join(ch_annotation_out) - | join( - ch_monoploid_seqs ?: Channel.empty(), - by:0, - remainder: true - ) - | map { meta, fasta, pass, out, mono -> - [ meta, fasta, pass, out, mono ?: [] ] - } + ch_lai_inputs = skip_lai + ? Channel.empty() + : ch_short_ids_fasta + | join(ch_pass_list) + | join(ch_annotation_out) + | map { meta, fasta, pass, out -> + [ meta.id, meta, fasta, pass, out ] + } + | join( + ch_short_monoploid_seqs + | map { meta, mono -> [ meta.id, mono ] }, + by:0, + remainder: true + ) + | map { id, meta, fasta, pass, out, mono -> + [ meta, fasta, pass, out, mono ?: [] ] + } LTRRETRIEVER_LAI( ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] }, ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass }, @@ -88,27 +101,56 @@ workflow FASTA_LTRRETRIEVER_LAI { ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono } ) - ch_lai_log = LTRRETRIEVER_LAI.out.log - ch_lai_out = LTRRETRIEVER_LAI.out.lai_out - ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first()) + ch_lai_log = LTRRETRIEVER_LAI.out.log + ch_lai_out = LTRRETRIEVER_LAI.out.lai_out + ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first()) // MODULE: CUSTOM_RESTOREGFFIDS - ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv) + ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv) CUSTOM_RESTOREGFFIDS ( ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] }, ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv } ) - ch_restored_gff = ch_annotation_gff - | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) - | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } - ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) + ch_restored_gff = ch_annotation_gff + | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) + | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } + + ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) emit: - ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] - annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] - lai_log = ch_lai_log // channel: [ val(meta), log ] - lai_out = ch_lai_out // channel: [ val(meta), out ] - versions = ch_versions // channel: [ versions.yml ] + ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] + annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] + lai_log = ch_lai_log // channel: [ val(meta), log ] + lai_out = ch_lai_out // channel: [ val(meta), out ] + versions = ch_versions // channel: [ versions.yml ] +} + + +def map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) { + + def short_ids_head = short_ids_tsv.text.split('\n')[0] + + if (short_ids_head == "IDs have acceptable length and character. No change required.") { + return [ meta, monoploid_seqs ] + } + + def orig_to_new_ids = [:] + short_ids_tsv.text.eachLine { line -> + def (original_id, renamed_id) = line.split('\t') + orig_to_new_ids[original_id] = renamed_id + } + + def output_file = new File("${meta.id}.mapped.monoploid.seqs.txt") + monoploid_seqs.text.eachLine { original_id -> + if (!orig_to_new_ids[original_id]) { + error "Faild to find $original_id in ${monoploid_seqs}" + + "The monoploid_seqs file is malformed!" + } + + output_file.append(orig_to_new_ids[original_id]) + } + + return [ meta, output_file.toPath() ] } diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test index 773c514a..f2c401d8 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test +++ b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test @@ -18,7 +18,7 @@ nextflow_workflow { tag "ltrretriever/lai" tag "custom/restoregffids" - test("actinidia_chinensis-genome_21_fasta_gz-success") { + test("actinidia_chinensis-genome_21_fasta_gz") { setup { run("GUNZIP") { @@ -38,9 +38,9 @@ nextflow_workflow { when { workflow { """ - input[0] = GUNZIP.out.gunzip - input[1] = [] - input[2] = false + input[0] = GUNZIP.out.gunzip + input[1] = [] + input[2] = false """ } } @@ -56,4 +56,103 @@ nextflow_workflow { ) } } + + test("actinidia_chinensis-genome_21_fasta_gz-with_mono") { + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + GUNZIP.out.gunzip.map { meta, fasta -> + def fa = new File('test.fa') + fa.write(fasta.text.replaceAll('>chr1', '>chr_xxxxxxxxxxxxxxx_1')) + + [ meta, fa.toPath() ] + } + | set { ch_fa } + + def monoploid_seqs = new File('test.mono.seq.txt') + monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1") + + input[0] = ch_fa + input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] ) + input[2] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') }, + { assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') }, + { assert file(workflow.out.lai_log[0][1]).text.contains('Done!') }, + { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 }, + { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') } + ) + } + } + + test("actinidia_chinensis-genome_21_fasta_gz-without_lai") { + + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + ] + """ + } + } + } + + when { + workflow { + """ + GUNZIP.out.gunzip.map { meta, fasta -> + def fa = new File('test.fa') + fa.write(fasta.text.replaceAll('>chr1', '>chr_xxxxxxxxxxxxxxx_1')) + + [ meta, fa.toPath() ] + } + | set { ch_fa } + + def monoploid_seqs = new File('test.mono.seq.txt') + monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1") + + input[0] = ch_fa + input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] ) + input[2] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') }, + { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') }, + { assert workflow.out.lai_log == [] }, + { assert workflow.out.lai_out == [] } + ) + } + } }