diff --git a/CHANGELOG.md b/CHANGELOG.md index eb68c18b..320a7a78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 1. Created summary presence/absence tables for NCBI FCS modules [#88](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/88) 2. Added min. system requirements [#91](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/91) 3. Updated modules: `UNTAR`, `MERYL_COUNT`, `GUNZIP`, `MINIMAP2_ALIGN` +4. Added a test to verify the fix for the bug which resulted in a pipeline crash for assemblies without LTRs +5. Locally patched `FASTA_LTRRETRIEVER_LAI` sub-workflow to emit the `LTRRETRIEVER_LTRRETRIEVER` log as one of the outputs +6. Locally patched `LTRRETRIEVER_LTRRETRIEVER` module to not fail when `LTR_retriever` exits with an error ### `Fixed` @@ -18,7 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 3. Updated NCBI FCS GX to 0.5.4 [#93](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/93) 4. Now NCBI FCS GX module uses all the cores available from the Nextflow task 5. Fixed a bug which caused `PLOTSR` to fail for certain assembly names [#102](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/102) -6. Changed default branch name from `master` to `main` in nf-core template files +6. Now `LTRRETRIEVER_LTRRETRIEVER` does not crash when the input assembly does not contain any LTRs [#92](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/92) +7. Now `LTRRETRIEVER_LTRRETRIEVER` does not crash when the input assembly is not writable [#98](https://github.com/Plant-Food-Research-Open/assemblyqc/issues/98) ### `Dependencies` @@ -27,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` +1. Changed default branch name from `master` to `main` in nf-core template files + ## v2.0.0 - [04-June-2024] ### `Added` diff --git a/bin/report_modules/parsers/lai_parser.py b/bin/report_modules/parsers/lai_parser.py index 7318e92a..eac49a85 100644 --- a/bin/report_modules/parsers/lai_parser.py +++ b/bin/report_modules/parsers/lai_parser.py @@ -92,4 +92,28 @@ def parse_lai_folder(folder_name="lai_outputs"): } data["LAI"].append(stats) + list_of_ltrretriever_log_files = lai_folder_path.glob("*.log") + + for file in list_of_ltrretriever_log_files: + if str(file).endswith("LAI.log"): + continue + + p = re.compile("ERROR: (.*)") + match_results = p.findall(file.read_text()) + if len(match_results) < 1: + continue + + file_tokens = re.findall( + r"([\w]+).log", + os.path.basename(str(file)), + ) + + hap_name = file_tokens[0] + + stats = { + "hap": hap_name, + "result": f"LTR_retriever Error: {match_results[0]}".strip(), + } + data["LAI"].append(stats) + return {"LAI": sort_list_of_results(data["LAI"], "hap")} diff --git a/bin/report_modules/templates/lai/lai.html b/bin/report_modules/templates/lai/lai.html index 50a98582..34c5ccd8 100644 --- a/bin/report_modules/templates/lai/lai.html +++ b/bin/report_modules/templates/lai/lai.html @@ -14,7 +14,9 @@ Nucleic Acids Research, Volume 46, Issue 21, 30 November 2018, Page e126, 10.1093/nar/gky730

+ {% if 'LTRRETRIEVER_LAI' in all_stats_dicts['VERSIONS'] %}

Version: {{ all_stats_dicts['VERSIONS']['LTRRETRIEVER_LAI']['lai'] }}

+ {% endif %} {% include 'lai/dropdown.html' %} {% include 'lai/summary_contents.html' %} diff --git a/modules/pfr/ltrretriever/ltrretriever/main.nf b/modules/pfr/ltrretriever/ltrretriever/main.nf index f4577920..dc7bb7e3 100644 --- a/modules/pfr/ltrretriever/ltrretriever/main.nf +++ b/modules/pfr/ltrretriever/ltrretriever/main.nf @@ -16,9 +16,9 @@ process LTRRETRIEVER_LTRRETRIEVER { output: tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("${prefix}.pass.list"), emit: pass_list - tuple val(meta), path("*.pass.list.gff3") , emit: pass_list_gff - tuple val(meta), path("*.LTRlib.fa") , emit: ltrlib + tuple val(meta), path("${prefix}.pass.list"), emit: pass_list , optional: true + tuple val(meta), path("*.pass.list.gff3") , emit: pass_list_gff , optional: true + tuple val(meta), path("*.LTRlib.fa") , emit: ltrlib , optional: true tuple val(meta), path("${prefix}.out") , emit: annotation_out , optional: true tuple val(meta), path("*.out.gff3") , emit: annotation_gff , optional: true path "versions.yml" , emit: versions @@ -33,22 +33,32 @@ process LTRRETRIEVER_LTRRETRIEVER { def infinder = finder ? "-infinder $finder" : '' def inmgescan = mgescan ? "-inmgescan $mgescan" : '' def non_tgca_file = non_tgca ? "-nonTGCA $non_tgca" : '' + def writable_genome = "${genome.baseName}.writable.${genome.extension}" """ + cp \\ + $genome \\ + $writable_genome + + chmod \\ + a+w \\ + $writable_genome + LTR_retriever \\ - -genome $genome \\ + -genome $writable_genome \\ $inharvest \\ $infinder \\ $inmgescan \\ $non_tgca_file \\ -threads $task.cpus \\ $args \\ - &> >(tee "${prefix}.log" 2>&1) + &> >(tee "${prefix}.log" 2>&1) \\ + || echo "Errors from LTR_retriever printed to ${prefix}.log" - mv "${genome}.pass.list" "${prefix}.pass.list" - mv "${genome}.pass.list.gff3" "${prefix}.pass.list.gff3" - mv "${genome}.LTRlib.fa" "${prefix}.LTRlib.fa" - mv "${genome}.out" "${prefix}.out" || echo ".out was not produced" - mv "${genome}.out.gff3" "${prefix}.out.gff3" || echo ".out.gff3 was not produced" + mv "${writable_genome}.pass.list" "${prefix}.pass.list" || echo ".pass.list was not produced" + mv "${writable_genome}.pass.list.gff3" "${prefix}.pass.list.gff3" || echo ".pass.list.gff3 was not produced" + mv "${writable_genome}.LTRlib.fa" "${prefix}.LTRlib.fa" || echo ".LTRlib.fa was not produced" + mv "${writable_genome}.out" "${prefix}.out" || echo ".out was not produced" + mv "${writable_genome}.out.gff3" "${prefix}.out.gff3" || echo ".out.gff3 was not produced" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test index e8e13a60..5f67ff16 100644 --- a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test +++ b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test @@ -10,45 +10,99 @@ nextflow_process { tag "ltrretriever" tag "ltrretriever/ltrretriever" tag "gunzip/main" - tag "gt/ltrharvest" - tag "gt/suffixerator" + tag "ltrharvest" tag "ltrfinder" tag "cat/cat" - test("actinidia_chinensis-genome_21_fasta_gz-success") { + test("sarscov2-genome-no_ltr") { setup { - run('GUNZIP') { - script "../../../gunzip/main" + run("LTRHARVEST") { + script "../../../ltrharvest" process { """ input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } } - run("GT_SUFFIXERATOR") { - script "../../../gt/suffixerator" + run("LTRFINDER") { + script "../../../ltrfinder" process { """ - input[0] = GUNZIP.out.gunzip - input[1] = 'dna' + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + run("CAT_CAT") { + script "../../../cat/cat" + + process { + """ + input[0] = LTRHARVEST.out.scn.mix(LTRFINDER.out.scn).groupTuple() + """ + } + } + } + + when { + process { + """ + input[0] = input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = CAT_CAT.out.file_out.map { meta, tabout -> tabout } + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log[0][1]).text.contains("ERROR: No candidate is found in the file(s) you specified.") }, + { assert snapshot(process.out.versions).match("versions_no_ltr") } + ) + } + + } + + test("actinidia_chinensis-genome_21_fasta_gz-success") { + + setup { + + run('GUNZIP') { + script "../../../gunzip/main" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] """ } } - run("GT_LTRHARVEST") { - script "../../../gt/ltrharvest" + run("LTRHARVEST") { + script "../../../ltrharvest" process { """ - input[0] = GT_SUFFIXERATOR.out.index + input[0] = GUNZIP.out.gunzip """ } } @@ -68,7 +122,7 @@ nextflow_process { process { """ - input[0] = GT_LTRHARVEST.out.tabout.mix(LTRFINDER.out.scn).groupTuple() + input[0] = LTRHARVEST.out.scn.mix(LTRFINDER.out.scn).groupTuple() """ } } @@ -90,7 +144,7 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.log[0][1]).text.contains("####### Result files #########") }, - { assert snapshot(process.out.pass_list).match("pass_list") }, + { assert path(process.out.pass_list[0][1]).text.contains("Copia\tLTR") }, { assert path(process.out.pass_list_gff[0][1]).text.contains("chr1\tLTR_retriever\ttarget_site_duplication") }, { assert path(process.out.ltrlib[0][1]).text.contains("LTR#LTR/Copia") }, { assert snapshot(process.out.annotation_out).match("annotation_out") }, @@ -110,7 +164,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) ] input[1] = [] input[2] = [] @@ -123,11 +177,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(path(process.out.versions[0]).text).match("versions_stub") } + { assert snapshot(process.out).match() } ) } } -} \ No newline at end of file +} diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap index bcf98638..825b1e59 100644 --- a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap +++ b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test.snap @@ -1,31 +1,4 @@ { - "versions_stub": { - "content": [ - "\"LTRRETRIEVER_LTRRETRIEVER\":\n LTR_retriever: v2.9.9\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-19T11:04:16.007262" - }, - "pass_list": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.pass.list:md5,0c96ee3b48691e65da2235786a926160" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-19T11:17:50.087449" - }, "versions": { "content": [ "\"LTRRETRIEVER_LTRRETRIEVER\":\n LTR_retriever: v2.9.9\n" @@ -156,14 +129,26 @@ { "id": "test" }, - "test.out:md5,4ecf9226cbd7a3aaf7cf5cfa575fcc6a" + "test.out:md5,33d89bea9031f25de8f0d3591ab94d87" ] ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-16T14:18:02.458476" + }, + "versions_no_ltr": { + "content": [ + [ + "versions.yml:md5,3ab159acaee06b342b56e2d35e5e669b" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-19T11:17:50.150622" + "timestamp": "2024-07-16T14:03:52.324194" } -} \ No newline at end of file +} diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config b/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config index 11499594..7f675565 100644 --- a/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config +++ b/modules/pfr/ltrretriever/ltrretriever/tests/nextflow.config @@ -1,18 +1,12 @@ process { - withName: GT_SUFFIXERATOR { - ext.args = '-suf -lcp' - // GT_LTRHARVEST requires -suf, -lcp - } - withName: LTRFINDER { ext.args = '-harvest_out' - // LTRRETRIEVER requires -harvest_out + // LTRRETRIEVER_LTRRETRIEVER requires -harvest_out } - withName: GT_LTRHARVEST { - ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes' - // recommended parameters: https://github.com/oushujun/LTR_retriever#usage + withName: LTRHARVEST { + ext.prefix = { "${meta.id}_ltrharvest" } } withName: CAT_CAT { diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/tags.yml b/modules/pfr/ltrretriever/ltrretriever/tests/tags.yml deleted file mode 100644 index 67241ccb..00000000 --- a/modules/pfr/ltrretriever/ltrretriever/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -ltrretriever/ltrretriever: - - "modules/nf-core/ltrretriever/ltrretriever/**" diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf index 3820e69c..b1d123cb 100644 --- a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf +++ b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf @@ -102,6 +102,7 @@ workflow FASTA_LTRRETRIEVER_LAI { [] ) + ch_ltrretriever_log = LTRRETRIEVER_LTRRETRIEVER.out.log ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out ch_pass_out = ch_pass_list.join(ch_annotation_out) @@ -164,11 +165,12 @@ workflow FASTA_LTRRETRIEVER_LAI { ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) emit: - ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] - annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] - lai_log = ch_lai_log // channel: [ val(meta), log ] - lai_out = ch_lai_out // channel: [ val(meta), out ] - versions = ch_versions // channel: [ versions.yml ] + ltrretriever_log = ch_ltrretriever_log // channel: [ val(meta), fasta ] + ltrlib = ch_ltrlib // channel: [ val(meta), fasta ] + annotation_gff = ch_restored_gff // channel: [ val(meta), gff ] + lai_log = ch_lai_log // channel: [ val(meta), log ] + lai_out = ch_lai_out // channel: [ val(meta), out ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/tests/noltr/assemblysheet.csv b/tests/noltr/assemblysheet.csv new file mode 100644 index 00000000..9769e377 --- /dev/null +++ b/tests/noltr/assemblysheet.csv @@ -0,0 +1,3 @@ +tag,fasta +sarscov2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta +FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz diff --git a/tests/noltr/params.json b/tests/noltr/params.json new file mode 100644 index 00000000..a46bd34d --- /dev/null +++ b/tests/noltr/params.json @@ -0,0 +1,9 @@ +{ + "config_profile_name": "No LTRs assembly profile", + "config_profile_description": "Profile to test an assembly without LTRs", + "input": "tests/noltr/assemblysheet.csv", + "lai_skip": false, + "max_cpus": 2, + "max_memory": "6.GB", + "max_time": "6.h" +} diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf index 1fa329ae..2b8bc12e 100644 --- a/workflows/assemblyqc.nf +++ b/workflows/assemblyqc.nf @@ -535,6 +535,10 @@ workflow ASSEMBLYQC { // This partial join can't fail because both outputs are // from the same process | map { meta, log, out -> out ? [ log, out ] : [log] } + | mix( + FASTA_LTRRETRIEVER_LAI.out.ltrretriever_log + | map { meta, log -> log } + ) ch_versions = ch_versions.mix(FASTA_LTRRETRIEVER_LAI.out.versions)