pre-commit run --all-files

oist · Oct 3, 2024 · bb4b99e · bb4b99e
1 parent 2778d59
commit bb4b99e
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
-Genome pre-processing pipeline
-==============================
+# Genome pre-processing pipeline
 
 This is a local pipeline to pre-process downloaded genomes before feeding them
 to <https://github.com/nf-core/pairgenomealign>.
@@ -8,21 +7,22 @@ to <https://github.com/nf-core/pairgenomealign>.
 
 This pipeline takes genomes as inputs and soft-masks their repeats with the following software:
 
- - tantan (our default choice from a long time because TRF used to be non-free).
- - windowmasker
- - repeatmasker
+- tantan (our default choice from a long time because TRF used to be non-free).
+- windowmasker
+- repeatmasker
 
 The input of repeatmasker can be any of:
- - repeatmodeller (default)
- - DFAM
- - a custom repeat library.
 
-Repeatmasker and repeatmodeller are run from the same image as the standard _nf-core_ module.  But it is possible to pass the URL to an alternative singularity image, for instance to use the latest [TE Tools container](https://github.com/Dfam-consortium/TETools?tab=readme-ov-file#dfam-te-tools-container)
+- repeatmodeller (default)
+- DFAM
+- a custom repeat library.
+
+Repeatmasker and repeatmodeller are run from the same image as the standard _nf-core_ module. But it is possible to pass the URL to an alternative singularity image, for instance to use the latest [TE Tools container](https://github.com/Dfam-consortium/TETools?tab=readme-ov-file#dfam-te-tools-container)
 
 ## Disclaimer
 
 This is not an official pipeline. This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) initative, and reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).
- 
+
 > The nf-core framework for community-curated bioinformatics pipelines.
 >
 > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.
@@ -73,30 +73,30 @@ nextflow run ./main.nf \
 
 ## Options
 
- - Point `--repeatlib` to a FASTA file to have an extra RepeatMasker run using it as a library.
- - Set `--taxon` to a taxon name to have an extra RepeatMasker run using the `-species` option set to that taxon.
- - Point `--singularity_image` to a local file path like `/flash/LuscombeU/singularity.cacheDir/tetools_1.88.5.sif` or an URL to singularity image to replace the default one.
- - Set the `--gzipped_input=false` parameter when the input is not compressed..
+- Point `--repeatlib` to a FASTA file to have an extra RepeatMasker run using it as a library.
+- Set `--taxon` to a taxon name to have an extra RepeatMasker run using the `-species` option set to that taxon.
+- Point `--singularity_image` to a local file path like `/flash/LuscombeU/singularity.cacheDir/tetools_1.88.5.sif` or an URL to singularity image to replace the default one.
+- Set the `--gzipped_input=false` parameter when the input is not compressed..
 
 ## Pipeline output
 
 ### `tantan`, `repeatmodeler`, `windowmasker`, `dfam` (optional), `extlib` (optional)
 
- - Masked genome file (compressed).
- - BED file representing the masked regions.
- - Summary statistics of the softmasked genome.
+- Masked genome file (compressed).
+- BED file representing the masked regions.
+- Summary statistics of the softmasked genome.
 
 ## Resource usage
 
 On a test run on haplotype-merged and diploid assemblies of _Oikopleura dioica_ (2n = 60 Mbp):
 
- - CPU usage was ~50 % for most processes.  RepeatModeller was allocated 24 cores and used ~10 on average.
- - Memory usage was less than 1 GB for all processes except RepeatModeller (~6 GB, max 8 GB).
- - All processes needed only 10 % of the allocated time, except for RepeatModeller, which took between 100 and 500 minutes.
+- CPU usage was ~50 % for most processes. RepeatModeller was allocated 24 cores and used ~10 on average.
+- Memory usage was less than 1 GB for all processes except RepeatModeller (~6 GB, max 8 GB).
+- All processes needed only 10 % of the allocated time, except for RepeatModeller, which took between 100 and 500 minutes.
 
 ## Future directions
 
- - It may be interesting to add TRF and ULTRA, and compare and combine their results to the ones of tantan.
+- It may be interesting to add TRF and ULTRA, and compare and combine their results to the ones of tantan.
 
 ## Credits
 

diff --git a/modules.json b/modules.json
@@ -8,58 +8,42 @@
                     "fastqc": {
                         "branch": "master",
                         "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gfastats": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "gunzip": {
                         "branch": "master",
                         "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "repeatmodeler/builddatabase": {
                         "branch": "master",
                         "git_sha": "1cbb5551b917aa423e414dcd69898d01520a309d",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "repeatmodeler/repeatmodeler": {
                         "branch": "master",
                         "git_sha": "84efd2f87d07deb22ee9378f065a9aa5f1434161",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "windowmasker/mkcounts": {
                         "branch": "master",
                         "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     },
                     "windowmasker/ustat": {
                         "branch": "master",
                         "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac",
-                        "installed_by": [
-                            "modules"
-                        ]
+                        "installed_by": ["modules"]
                     }
                 }
             },
@@ -68,23 +52,17 @@
                     "utils_nextflow_pipeline": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfcore_pipeline": {
                         "branch": "master",
                         "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     },
                     "utils_nfvalidation_plugin": {
                         "branch": "master",
                         "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa",
-                        "installed_by": [
-                            "subworkflows"
-                        ]
+                        "installed_by": ["subworkflows"]
                     }
                 }
             }

diff --git a/modules/local/mergemasks.nf b/modules/local/mergemasks.nf
@@ -26,7 +26,7 @@ process MERGE_MASKS {
     bedtools jaccard -nonamecheck -a $tantan       -b $windowmasker         > ${prefix}_tantan_windowmasker_jaccard.txt
     bedtools jaccard -nonamecheck -a $tantan       -b $repeatmasker         > ${prefix}_tantan_repeatmasker_jaccard.txt
     bedtools jaccard -nonamecheck -a $repeatmasker -b $windowmasker         > ${prefix}_repeatmasker_windowmasker_jaccard.txt
-    
+
     zcat $tantan $windowmasker               | sort -k1,1 -k2,2n | bedtools merge | gzip --best  > ${prefix}_tantan_windowmasker.bed.gz
     zcat $tantan $repeatmasker               | sort -k1,1 -k2,2n | bedtools merge | gzip --best  > ${prefix}_tantan_repeatmasker.bed.gz
     zcat $windowmasker $repeatmasker         | sort -k1,1 -k2,2n | bedtools merge | gzip --best  > ${prefix}_windowmasker_repeatmasker.bed.gz

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -10,10 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "input",
-                "outdir"
-            ],
+            "required": ["input", "outdir"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -184,14 +181,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {