diff --git a/CHANGELOG.md b/CHANGELOG.md index e5e3cb3..7f5f0db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## v1.0.0 "Sweet potato" - [August 27th, 2024] Initial release of nf-core/pairgenomealign, created with the [nf-core](https://nf-co.re/) template. - -### `Added` - -### `Fixed` - -### `Dependencies` - -### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index 55f39e2..1d06505 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -8,6 +8,10 @@ > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. +## Pipeline design + +> Charles Plessy, Michael J. Mansfield, Aleksandra Bliznina, Aki Masunaga, Charlotte West, Yongkai Tan, Andrew W. Liu, Jan Grašič, María Sara del Río Pisula, Gaspar Sánchez-Serna, Marc Fabrega-Torrus, Alfonso Ferrández-Roldán, Vittoria Roncalli, Pavla Navratilova, Eric M. Thompson, Takeshi Onuma, Hiroki Nishida, Cristian Cañestro, Nicholas M. Luscombe. Extreme genome scrambling in marine planktonic Oikopleura dioica cryptic species. Genome Res. 2024. 34: 426-440; doi: [10.1101/2023.05.09.539028](https://doi.org/10.1101/gr.278295.123). PubMed ID: [38621828](https://pubmed.ncbi.nlm.nih.gov/38621828/) + ## Pipeline tools - [LAST](https://gitlab.com/mcfrith/last/) diff --git a/README.md b/README.md index 196d4d2..f31d667 100644 --- a/README.md +++ b/README.md @@ -21,14 +21,9 @@ **nf-core/pairgenomealign** is a bioinformatics pipeline that aligns one or more _query_ genomes to a _target_ genome, and plots pairwise representations. - +![Tubemap workflow summary](docs/images/pairgenomealign-tubemap.png "Tubemap workflow summary") -The pipeline can generate four kinds of outputs, depending on whether sequences of one genome can match the other genome multiple times or not. - -- _**many-to-many**_ (M2M): Every computed alignments between the _target_ and a _query_ genome. -- _**many-to-one**_ (M2O): Alignments where regions of the _target_ genome are matched at most once by a _query_ genome. -- _**one-to-many**_ (M2O): Alignments where regions of a _query_ genome are matched at most once by the _target_ genome. -- _**one-to-one**_ (O2O) Alignment where regions of the _target_ and _query_ genomes are used at most once. +The pipeline can generate four kinds of outputs, called _many-to-many_, _many-to-one_, _one-to-many_ and _one-to-one_, depending on whether sequences of one genome are allowed match the other genome multiple times or not. These alignments are output in [MAF](https://genome.ucsc.edu/FAQ/FAQformat.html#format5) format, and optional line plot representations are output in PNG format. @@ -77,7 +72,11 @@ For more details about the output files and reports, please refer to the We thank the following people for their extensive assistance in the development of this pipeline: -- [Mahdi Mohammed](https://github.com/U13bs1125): ported the original pipeline to _nf-core_ template 2.14.x. +- [Mahdi Mohammed](https://github.com/U13bs1125) ported the original pipeline to _nf-core_ template 2.14.x. +- [Martin Frith](https://github.com/mcfrith/), the author of LAST, gave us extensive feedback and advices. +- [Michael Mansfield](https://github.com/mjmansfi) tested the pipeline and provided critical comments. +- [Aleksandra Bliznina](https://github.com/aleksandrabliznina) contributed to the creation of the initial `last/*` modules. +- [Jiashun Miao](https://github.com/miaojiashun) and [Huyen Pham](https://github.com/ngochuyenpham) tested the pipeline on vertebrate genomes. ## Contributions and Support diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 7293a54..646ff21 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/pairgenomealign + This report has been generated by the nf-core/pairgenomealign analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-pairgenomealign-methods-description": order: -1000 @@ -19,10 +19,39 @@ custom_data: file_format: "tsv" section_name: "Training parameter statistics" plot_type: "table" + headers: + id: + title: "ID" + description: "target___query" + substitution_percent_identity: + title: "Substitution Percent Identity" + "last -t": + title: "Temperature" + description: "Parameter for converting between scores and probability ratios. This affects the column ambiguity estimates. A score is converted to a probability ratio by this formula: exp(score / TEMPERATURE). The default value is 1/lambda, where lambda is the scale factor of the scoring matrix, which is calculated by the method of Yu and Altschul (YK Yu et al. 2003, PNAS 100(26):15688-93)." + "last -a": + title: "Gap existence" + description: "Gap existence cost (lastal -a)" + "last -b": + title: "Gap extension" + description: "Gap extension cost (lastal -b)" + "last -A": + title: "Insertion existence" + description: "Insertion existence cost (lastal -A)" + "last -B": + title: "Insertion extension" + description: "Insertion extension cost (lastal -B)" last_o2o: file_format: "tsv" section_name: "Alignment statistics" plot_type: "table" + headers: + id: + title: "ID" + description: "target__query" + TotalAlignmentLength: + title: "Total alignment length" + PercentSimilarity: + title: "Percent similarity" sp: last_o2o: diff --git a/assets/schema_input.json b/assets/schema_input.json index 1e9ca52..f28d8f4 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -18,7 +18,7 @@ "format": "file-path", "exists": true, "pattern": "^\\S+\\.f(ast|n)?a(\\.gz)?$", - "errorMessage": "Fasta file for genomes must be provided, cannot contain spaces and must have extension '.fa', '.fa.gz', '.fna', '.fna.gz', '.fasta' or '.fasta.gz'" + "errorMessage": "Fasta file for genomes must be provided, cannot contain spaces and must have extension `.fa`, `.fa.gz`, `.fna`, `.fna.gz`, `.fasta` or `.fasta.gz`" } }, "required": ["sample", "fasta"] diff --git a/assets/tube_map.svg b/assets/tube_map.svg deleted file mode 100644 index 34919b9..0000000 --- a/assets/tube_map.svg +++ /dev/null @@ -1,475 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - Targetgenome - - Querygenome(s) - Index - Trainalignmentparameters - Alignquery ontarget - Many-to-manyalignment(s) - Many-to-onealignment(s) - One-to-onealignment(s) - Many-to-manydotplot(s) - Many-to-onedotplot(s) - One-to-onedotplot(s) - One-to-manyalignment(s) - One-to-manydotplot(s) - - - - - - - - - - - - - Pairwise Genome alignment pipeline - - - - - seqtk cutN - assemblyscan - - - diff --git a/conf/modules.config b/conf/modules.config index 6b4599f..3c1b725 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,15 +18,15 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: SEQTK_CUTN_TARGET { + withName: CUTN_TARGET { ext.args = { "-n 10" } } - withName: SEQTK_CUTN_QUERY { + withName: CUTN_QUERY { ext.args = { "-n 10" } } - withName: 'LAST_LASTDB' { + withName: 'ALIGNMENT_LASTDB' { // See https://gitlab.com/mcfrith/last/-/blob/main/doc/lastdb.rst for details // -R01: uppercase all sequences and then lowercase simple repeats // -c: soft-mask lowercase letters @@ -34,51 +34,51 @@ process { ext.args = { "-R01 -c -u${params.seed} -S2" } } - withName: 'LAST_SPLIT_O2M' { + withName: 'ALIGNMENT_SPLIT_O2M' { ext.prefix = { "${meta.id}.o2m_aln" } ext.args = { "--reverse -m${params.last_split_mismap}" } } - withName: 'LAST_DOTPLOT_O2M' { + withName: 'ALIGNMENT_DOTPLOT_O2M' { ext.prefix = { "${meta.id}.o2m_plt" } ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } } - withName: 'LAST_SPLIT_M2O' { + withName: 'ALIGNMENT_SPLIT_M2O' { ext.prefix = { "${meta.id}.m2o_aln" } ext.args = { "-m${params.last_split_mismap}" } } - withName: 'LAST_SPLIT_O2O' { + withName: 'ALIGNMENT_SPLIT_O2O' { ext.prefix = { "${meta.id}.o2o_aln" } ext.args = { "--reverse -m${params.last_split_mismap}" } } - withName: 'LAST_TRAIN' { + withName: 'ALIGNMENT_TRAIN' { ext.args = { "--revsym ${params.lastal_args}" } } - withName: 'LAST_LASTAL_M2O' { + withName: 'ALIGNMENT_LASTAL_M2O' { ext.prefix = { "${meta.id}.m2o_aln" } ext.args = { "--split-f=MAF+ ${params.lastal_args} ${params.lastal_extr_args}" } } - withName: 'LAST_LASTAL_M2M' { + withName: 'ALIGNMENT_LASTAL_M2M' { ext.prefix = { "${meta.id}.m2m_aln" } ext.args = { "${params.lastal_args} ${params.lastal_extr_args}" } } - withName: 'LAST_DOTPLOT_O2O' { + withName: 'ALIGNMENT_DOTPLOT_O2O' { ext.prefix = { "${meta.id}.o2o_plt" } ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } } - withName: 'LAST_DOTPLOT_M2O' { + withName: 'ALIGNMENT_DOTPLOT_M2O' { ext.prefix = { "${meta.id}.m2o_plt" } ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } } - withName: 'LAST_DOTPLOT_M2M' { + withName: 'ALIGNMENT_DOTPLOT_M2M' { ext.prefix = { "${meta.id}.m2m_plt" } ext.args = { "--rot2=h --sort2=3 --strands2=1 ${params.dotplot_options}" } } @@ -92,4 +92,12 @@ process { ] } + withName: 'MULTIQC_ASSEMBLYSCAN_PLOT_DATA' { + publishDir = [ + path: { "${params.outdir}/multiqc/assemblyscan_plot_data" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/docs/images/Homo_sapiens_GCA_000001405.29_GRCh38.p14___Macaca_mulatta_GCA_003339765.3.o2o_plt.png b/docs/images/Homo_sapiens_GCA_000001405.29_GRCh38.p14___Macaca_mulatta_GCA_003339765.3.o2o_plt.png new file mode 100644 index 0000000..96dc49c Binary files /dev/null and b/docs/images/Homo_sapiens_GCA_000001405.29_GRCh38.p14___Macaca_mulatta_GCA_003339765.3.o2o_plt.png differ diff --git a/docs/images/mqc_base_content_summary-pct.png b/docs/images/mqc_base_content_summary-pct.png new file mode 100644 index 0000000..96538f6 Binary files /dev/null and b/docs/images/mqc_base_content_summary-pct.png differ diff --git a/docs/images/mqc_contigs_length_statistics.png b/docs/images/mqc_contigs_length_statistics.png new file mode 100644 index 0000000..34c01ba Binary files /dev/null and b/docs/images/mqc_contigs_length_statistics.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e4..0000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb..0000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf..0000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/mqc_last_o2o-stats.png b/docs/images/mqc_last_o2o-stats.png new file mode 100644 index 0000000..7530c07 Binary files /dev/null and b/docs/images/mqc_last_o2o-stats.png differ diff --git a/docs/images/mqc_train-stats.png b/docs/images/mqc_train-stats.png new file mode 100644 index 0000000..0c76a3e Binary files /dev/null and b/docs/images/mqc_train-stats.png differ diff --git a/docs/images/pairgenomealign-tubemap.png b/docs/images/pairgenomealign-tubemap.png new file mode 100644 index 0000000..6c12352 Binary files /dev/null and b/docs/images/pairgenomealign-tubemap.png differ diff --git a/docs/images/pairgenomealign-tubemap.svg b/docs/images/pairgenomealign-tubemap.svg new file mode 100644 index 0000000..d467777 --- /dev/null +++ b/docs/images/pairgenomealign-tubemap.svg @@ -0,0 +1,1559 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Indexseeds + seqtk cutN + assemblyscan + Target genome + TrainParameters + Alignquery(ies)on target + Query genome(s) + + + + fasta + + + + + + + + + + fasta + + + + + + png + + + + + png + + + + + + + + + + + + + Postprocessing + Inputs QC + Dotplots + + Alignment + + + One-to-one + + + + Many-to-one + + + + Outputs + + + + modules + + + + + v1.0 + + + + + + + + + + + + + + + + + + + nf- + core/ + + + + + + One-to-many + + + + Target genome + + + + + Query genome(s) + + + + pairgenome + align + + + + Inputs + + + + + Many-to-many + + + + + + + + + maf + + + + + maf + + + + + + + + One-tomany + + + png + + + + + + + + + + + + maf + + + + + maf + + + + + + + + Many-tomany + + + png + + + + + + + + + maf + + + + + + + maf + + + + + + Many-toone + + + + png + + + + + png + + + + + + + + maf + + + + + + + + + + + + + + + + + + + + maf + + + + + One-toone + + + + + png + + + + + png + + + + + + diff --git a/docs/output.md b/docs/output.md index fa97956..f303490 100644 --- a/docs/output.md +++ b/docs/output.md @@ -11,18 +11,31 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Alignments](#alignments) - Alignment of the _query_ genomes to the _target_ genome -- [Dot plots](#dot-plots) - Alignment of the _query_ genomes to the _target_ genome +- [Dot plots](#dot-plots) - Visualisation of the alignment of the _query_ genomes to the _target_ genome +- [`N` regions](#n-regions) - Coordinate of the `N` regions on the _query_ and _target_ genomes - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution Each _query_ genome, is aligned to the _target_ genome, and each alignment is visualised with dot plots. The output file names are constructed by concatenating the _target_ and _query_ sample identifiers with a `___` separator (three underscores), to faciliate re-extraction of the IDs from file names. +### Assembly statistics + +
+Output files + +- `assemblyscan/` + - `*.json` contains the statistics collected with the [`assembly-scan`](https://github.com/rpetit3/assembly-scan) software. + +
+ +Basic statistics on nucleotide content and contig length are collected for aligned genome for later plotting with MultiQC. + ### Alignments
Output files -- `last/` +- `alignment/` - `*.train` is the alignment parameters computed by `last-train` (optional) - `*.m2m_aln.maf.gz` is the _**many-to-many**_ alignment between _target_ and _query_ genomes. (optional through the `--m2m` option) - `*.m2o_aln.maf.gz` is the _**many-to-one**_ alignment regions of the _target_ genome are matched at most once by the _query_ genome. @@ -38,7 +51,7 @@ Genomes are aligned witn [`lastal`](https://gitlab.com/mcfrith/last/-/blob/main/
Output files -- `last/` +- `alignment/` - `*.m2m_plot` (optional) - `*.m2o_plot` (optional) - `*.o2o_plot` (optional) @@ -46,9 +59,22 @@ Genomes are aligned witn [`lastal`](https://gitlab.com/mcfrith/last/-/blob/main/
-Dot plots representing the pairwise genome alignments, produced with the [`last-dotplot`](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-dotplot.rst) tool. +Dot plots representing the pairwise genome alignments, produced with the [`last-dotplot`](https://gitlab.com/mcfrith/last/-/blob/main/doc/last-dotplot.rst) tool. In the one-to-one alignment example below, the `hg38` human genome (_target_) is represented on the horizontal axis and a monkey genopme (_Macaca mulatta_ accession number `GCA\_003339765.3`) on the vertical axis (_query_). Regions containing unknown (`N`) sequences are on pink background. Forward (+/+) alignments are plotted in red and reverse (+/– or –/+) in blue. _Target_ (human) contigs are displayed in their original order. _Query_ contigs (monkey) are reordered and possibly reverse-complemented to diagonalise the plot as much as possible. The names of reverse-complemented contigs are printed in blue. -The poly-N regions longer than 9 bases in each genome sequence are marked in pale red in the dot-plots. These often indicate contig boundaries in scaffolds. This is done with `seqtk cutN` and its output is provided in the `seqtk` directory. +![Example of a dot-plot produced by the pipeline after aligning human and macaque genomes](images/Homo_sapiens_GCA_000001405.29_GRCh38.p14___Macaca_mulatta_GCA_003339765.3.o2o_plt.png "Human–Monkey comparison") + +### `N` regions + +
+Output files + +- `cutn/` + - `targetGenome.bed` + - `.bed` + +
+ +The poly-N regions longer than 9 bases in each genome sequence often indicate contig boundaries in scaffolds. Therefore, we marked them in pale red in the dot-plots. They are detected with the`seqtk cutN` command and its output (in 3-column BED format) is provided in the `cutn` directory. Sample IDs are constructed to generate file names, except for the _target_ genome which is always called `targetGenome` to avoid filename collisions. ### MultiQC @@ -59,12 +85,39 @@ The poly-N regions longer than 9 bases in each genome sequence are marked in pal - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - `multiqc_plots/`: directory containing static images from the report in various formats. + - `assemblyscan_plot_data`: GC content and contig length statistics parsed from `assemblyscan` for MultiQC with a local module.
[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +Results generated by MultiQC collate pipeline QC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +The example MultiQC plots below were generated on this pipeline's full test dataset, which aligns the `hg38` human genome to other primate genomes. + +#### Base content + +The pipeline reports the base content of every query genome, like in the example below: + +![Example of a base content report for primate genomes](images/mqc_base_content_summary-pct.png "Primate genome base content") + +#### Contig length statistics + +Contig length statistics can be displayed by MultiQC as violin plots. + +![Example of a contig length report for primate genomes](images/mqc_contigs_length_statistics.png "Contig length statistics") + +#### TRaining parameters + +Alignment parameters computed by `last-train` can be displayed by MultiQC as violin plots. + +![Example of alignment parameters for primate genomes aligned to the human genome](images/mqc_train-stats.png "Alignment parameters") + +#### Alignment + +Alignment statistics can be displayed by MultiQC as violin plots. + +![Example of alignment statistics for primate genomes aligned to the human genome](images/mqc_last_o2o-stats.png "Alignment statistics") ### Pipeline information diff --git a/docs/usage.md b/docs/usage.md index fe08a72..cf91932 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,7 @@ ## Introduction -You need at least two genomes, a _target_, which will be indexed, and one or more _queries_, which will be aligned to the _target_. Paths to the genome files for the _queries_ are passed with the _nf-core_ samplesheet `--input` system, and path to the genome file of the _target_ is passed with the `--target` parameter. Note that the computation is not symmetric: inverting _target_ and _query_ does not lead to strictly identical results. +You need at least two genomes, a _target_, which will be indexed, and one or more _queries_, which will be aligned to the _target_. Paths to the genome files for the _queries_ are passed as samplesheets through the `--input` parameter and the path to the genome file of the _target_ is passed with the `--target` parameter. Note that the computation is not symmetric: inverting _target_ and _query_ does not lead to strictly identical results. ## Input @@ -16,99 +16,36 @@ The target genome sequence is taken from a FASTA-formated file passed by the `-- ### Samplesheet for query genome(s) -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 2 columns, a header row and single or multiple sample rows (genome samples) as shown in the examples below. - -```bash ---input '[path to samplesheet file]' -``` +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the `--input` parameter to specify its location. It has to be a comma-separated file with 2 columns, a header row and single or multiple sample rows (genome samples) as shown in the examples below. First, prepare a samplesheet with your input data that looks as follows: -`samplesheet.csv`: - -```csv +```csv title="samplesheet.csv" sample,fasta Query_1,query1_assembly.fasta +Query_2,query2_assembly.fasta +… ``` -Each row represents a fasta file, this can also contain multiple rows to accomodate multiple query genomes in fasta format. +Each row represents a fasta file. Use multiple rows as in the example above to accomodate multiple query genomes. | Column | Description | | -------- | -------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`). | | `fasta` | Full path to Fasta/fa/gz file | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +An [example samplesheet](../assets/samplesheet_full.csv) has been provided with the pipeline. ## Options -- `--seed` selects the name of the [LAST seed][] The default (`YASS`) searches - for “_long-and-weak similarities_” that “_allow for mismatches but not - gaps_”. Among alternatives, there are `NEAR` for “_short-and-strong - (near-identical) similarities_ … _with many gaps (insertions and deletions)_”, - `MAM8` to find _“weak - similarities with high sensitivity, but low speed and high memory usage”_ - or `RY128` that “_reduces run time and memory use, by only seeking seeds at - ~1/128 of positions in each sequence_”, which is useful when the purpose of - running this pipeline is only to generate whole-genome dotplots, or when - sensitivity for tiny fragments may be unnecessary or undesirable. Setting - the seed to `PSEUDO` triggers protein-to-DNA alignment mode (experimental). - -- `--lastal_args` defaults to `-C2 -D1e9` and is applied to both - the calls to `last-train` and `lastal`, like in the [LAST cookbook][] - and the [last-genome-alignments][] tutorial. - -- `--lastal_extr_args`is only passed to `lastal` and can be used for arguments - that are not recognised by `last-train`. - -- `--lastal_params`: path to a file containing alignment parameters - computed by [`last-train`][] or a [scoring matrix][]. If this option - is not used, the pipeline will run `last-train` for each query. - -- `--m2m`: (default: false) Compute and output the many-to-many alignment. - This adds time and can comsume considerable amount of space; use only - if you need that data. -- By default, `last-split` runs with `-m1e-5` to omit alignments with - mismap probability > 10−5, but this can be overriden with - the `--last_split_mismap` option. - -- The dotplots can be modified by overriding defaults and passing new - arguments via the `--dotplot_options` argument. Defaults and available - options can be seen on the manual page of the [`last-dotplot`][] program. - By default in this pipeline, the sequences of the _query_ genome are - sorted and oriented by their alignment to the _target_ genome - (`--sort2=3 --strands2=1`). For readability, their names are written - horizontally (`--rot2=h`). - -- Use `--skip_dotplot_m2m`, `--skip_dotplot_m2o`, `--skip_dotplot_o2o` - `--skip_dotplot_o2m` to skip the production of the dot plots that can be - computationally expensive and visually uninformative on large genomes with - shared repeats. File suffixes (see above) will not change. - -- By default the LAST index is named `target` and the ouput files are named - from the query IDs. Use the `--targetName` option to provide a name - that will be used for the LAST index and that will be prefixed to the - query IDs with a `___` separator. - -[`lastal`]: https://gitlab.com/mcfrith/last/-/blob/main/doc/lastal.rst -[`last-dotplot`]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-dotplot.rst -[LAST seed]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-seeds.rst -[LAST cookbook]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-cookbook.rst -[`last-train`]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-train.rst -[LAST tuning]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-tuning.rst -[scoring matrix]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-matrices.rst -[lastal documentation]: https://gitlab.com/mcfrith/last/-/blob/main/doc/lastal.rst -[last-genome-alignments]: https://github.com/mcfrith/last-genome-alignments +Please see the [parameter documentation](https://nf-co.re/pairgenomealign/parameters) for details. ## Fixed arguments (taken from the [LAST cookbook][] and the [LAST tuning][] manual) -- The `last-train` commands runs with `--revsym` as the DNA strands play equivalent roles in the studied genomes. +[LAST cookbook]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-cookbook.rst +[LAST tuning]: https://gitlab.com/mcfrith/last/-/blob/main/doc/last-tuning.rst -- The `lastdb` command takes all CPU cores available (`lastdb -P0`). Note that - when using more than one core, the output of `lastdb` may vary, causing the - order of tied positions to change. To prevent this to happen, you can pass a - nextflow parameter file to the pipeline that reduces the number of CPUs - allocated to `LAST_LASTDB` to 1. +- The `last-train` commands runs with `--revsym` as the DNA strands play equivalent roles in the studied genomes. ## Running the pipeline diff --git a/modules/local/custommodule.nf b/modules/local/multiqc_assemblyscan_plot_data.nf similarity index 89% rename from modules/local/custommodule.nf rename to modules/local/multiqc_assemblyscan_plot_data.nf index d8dee21..a3fa099 100644 --- a/modules/local/custommodule.nf +++ b/modules/local/multiqc_assemblyscan_plot_data.nf @@ -1,10 +1,13 @@ -process CUSTOMMODULE { +process MULTIQC_ASSEMBLYSCAN_PLOT_DATA { label 'process_single' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/jq:1.6': 'biocontainers/jq:1.6' }" + // This module parses the JSON output of the assemblyscan module with jq to extract + // statistics about GC content and contig length. I do not know how to contribute + // this as a proper MultiQC module but feel free to do so! input: path(json) diff --git a/nextflow.config b/nextflow.config index 281c059..c75282a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -58,7 +58,6 @@ params { max_cpus = 16 max_time = '240.h' - // Others added seed = 'YASS' targetName = 'target' m2m = false @@ -259,7 +258,7 @@ manifest { description = """Pairwise alignment pipeline (genome to genome or reads to genome)""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.0dev' + version = '1.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index e85208e..312b1bd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -27,14 +27,14 @@ "type": "string", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "format": "file-path", - "description": "Path to FASTA genome file for the target genome.", + "description": "Path or URL to a FASTA genome file for the _target_ genome.", "fa_icon": "far fa-file-code" }, "targetName": { "type": "string", "default": "target", - "help_text": "By default the LAST index is named `target` and the ouput files are named from the query IDs. Use this option to provide a name that will be used for the LAST index and that will be prefixed to the query IDs with a `___` separator.", - "description": "Target genome name" + "help_text": "By default the _target_ genome is named `target` and this name is concatenated with the sample IDs using `___` as a separator to construct alignment file names. Use this option to provide a more informative name for the target genome.", + "description": "Target genome name." }, "outdir": { "type": "string", @@ -64,14 +64,15 @@ "properties": { "m2m": { "type": "boolean", - "description": "make a many to many alignment", + "description": "Make a many to many alignment", + "help_text": "This adds time and can comsume considerable amount of space; use only if you need that data, for instance in the case of a self-alignment", "fa_icon": "fas fa-arrows-alt" }, "seed": { "type": "string", - "enum": ["YASS", "NEAR", "MAM8", "RY128", "PSEUDO"], + "enum": ["YASS", "NEAR", "MAM8", "RY128"], "help_text": "LAST creates a database of seed sequences in the _target_ genome, and provides different ways to generate these seeds. The default (`YASS`) searches for long-and-weak similarities that allow for mismatches but not gaps. Among alternatives, there are `NEAR` for short-and-strong (near-identical) similarities with many gaps (insertions and deletions), `MAM8` to find weak similarities with high sensitivity, but low speed and high memory usage, or `RY128` that reduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. See for details.", - "description": "Selects the name of the LAST seed.", + "description": "Select the the LAST seed to index the _target_ genome.", "default": "YASS", "fa_icon": "fas fa-seedling" }, @@ -83,19 +84,19 @@ "lastal_args": { "type": "string", "default": "-C2 -D1e9", - "description": "Arguments passed to both last-train and lastal.", + "description": "Arguments passed to both `last-train` and `lastal`.", "fa_icon": "fas fa-align-center" }, "lastal_extr_args": { "type": "string", - "description": "(Arguments passed only to lastal (useful when they are not recognised by last-train).", + "description": "Arguments passed only to `lastal` (useful when they are not recognised by `last-train`).", "fa_icon": "fas fa-align-center" }, "last_split_mismap": { "type": "string", "default": 0.00001, "fa_icon": "fas fa-cut", - "description": "Mismap probability cutoff for last-split." + "description": "Mismap probability cutoff for `last-split`." } }, "fa_icon": "fas fa-cogs" @@ -108,7 +109,7 @@ "properties": { "dotplot_options": { "type": "string", - "description": "Extra arguments passed to the last-dotplot program to customise the output. See .", + "description": "Extra arguments passed to `last-dotplot` to customise the output. See .", "fa_icon": "fas fa-cog" }, "skip_dotplot_o2m": { diff --git a/subworkflows/local/pairalign_m2m/main.nf b/subworkflows/local/pairalign_m2m/main.nf index b1d34bf..1bbe385 100644 --- a/subworkflows/local/pairalign_m2m/main.nf +++ b/subworkflows/local/pairalign_m2m/main.nf @@ -4,16 +4,16 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { LAST_DOTPLOT as LAST_DOTPLOT_M2O } from '../../../modules/nf-core/last/dotplot/main' -include { LAST_DOTPLOT as LAST_DOTPLOT_M2M } from '../../../modules/nf-core/last/dotplot/main' -include { LAST_DOTPLOT as LAST_DOTPLOT_O2O } from '../../../modules/nf-core/last/dotplot/main' -include { LAST_DOTPLOT as LAST_DOTPLOT_O2M } from '../../../modules/nf-core/last/dotplot/main' -include { LAST_LASTAL as LAST_LASTAL_M2M } from '../../../modules/nf-core/last/lastal/main' -include { LAST_LASTDB } from '../../../modules/nf-core/last/lastdb/main' -include { LAST_SPLIT as LAST_SPLIT_M2O } from '../../../modules/nf-core/last/split/main' -include { LAST_SPLIT as LAST_SPLIT_O2O } from '../../../modules/nf-core/last/split/main' -include { LAST_SPLIT as LAST_SPLIT_O2M } from '../../../modules/nf-core/last/split/main' -include { LAST_TRAIN } from '../../../modules/nf-core/last/train/main' +include { LAST_DOTPLOT as ALIGNMENT_DOTPLOT_M2O } from '../../../modules/nf-core/last/dotplot/main' +include { LAST_DOTPLOT as ALIGNMENT_DOTPLOT_M2M } from '../../../modules/nf-core/last/dotplot/main' +include { LAST_DOTPLOT as ALIGNMENT_DOTPLOT_O2O } from '../../../modules/nf-core/last/dotplot/main' +include { LAST_DOTPLOT as ALIGNMENT_DOTPLOT_O2M } from '../../../modules/nf-core/last/dotplot/main' +include { LAST_LASTAL as ALIGNMENT_LASTAL_M2M } from '../../../modules/nf-core/last/lastal/main' +include { LAST_LASTDB as ALIGNMENT_LASTDB } from '../../../modules/nf-core/last/lastdb/main' +include { LAST_SPLIT as ALIGNMENT_SPLIT_M2O } from '../../../modules/nf-core/last/split/main' +include { LAST_SPLIT as ALIGNMENT_SPLIT_O2O } from '../../../modules/nf-core/last/split/main' +include { LAST_SPLIT as ALIGNMENT_SPLIT_O2M } from '../../../modules/nf-core/last/split/main' +include { LAST_TRAIN as ALIGNMENT_TRAIN } from '../../../modules/nf-core/last/train/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -33,29 +33,29 @@ workflow PAIRALIGN_M2M { // Index the target genome // - LAST_LASTDB ( + ALIGNMENT_LASTDB ( ch_target ) // Train alignment parameters // - LAST_TRAIN ( + ALIGNMENT_TRAIN ( ch_queries, - LAST_LASTDB.out.index.map { row -> row[1] } // Remove metadata map + ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map ) // Align queries to target. This is a many-to-many alignment // - LAST_LASTAL_M2M ( - ch_queries.join(LAST_TRAIN.out.param_file), - LAST_LASTDB.out.index.map { row -> row[1] } // Remove metadata map + ALIGNMENT_LASTAL_M2M ( + ch_queries.join(ALIGNMENT_TRAIN.out.param_file), + ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map ) // Optionally plot the many-to-many alignment // if (! (params.skip_dotplot_m2m) ) { - LAST_DOTPLOT_M2M ( - LAST_LASTAL_M2M.out.maf.join(ch_queries_bed), + ALIGNMENT_DOTPLOT_M2M ( + ALIGNMENT_LASTAL_M2M.out.maf.join(ch_queries_bed), ch_target_bed, 'png' ) @@ -63,12 +63,12 @@ workflow PAIRALIGN_M2M { // Compute the one-to-many alignment and optionally plot it // - LAST_SPLIT_O2M ( - LAST_LASTAL_M2M.out.maf + ALIGNMENT_SPLIT_O2M ( + ALIGNMENT_LASTAL_M2M.out.maf ) if (! (params.skip_dotplot_o2m) ) { - LAST_DOTPLOT_O2M ( - LAST_SPLIT_O2M.out.maf.join(ch_queries_bed), + ALIGNMENT_DOTPLOT_O2M ( + ALIGNMENT_SPLIT_O2M.out.maf.join(ch_queries_bed), ch_target_bed, 'png' ) @@ -76,12 +76,12 @@ workflow PAIRALIGN_M2M { // Compute the many-to-one alignment and optionally plot it // - LAST_SPLIT_M2O ( - LAST_LASTAL_M2M.out.maf + ALIGNMENT_SPLIT_M2O ( + ALIGNMENT_LASTAL_M2M.out.maf ) if (! (params.skip_dotplot_m2o) ) { - LAST_DOTPLOT_M2O ( - LAST_SPLIT_M2O.out.maf.join(ch_queries_bed), + ALIGNMENT_DOTPLOT_M2O ( + ALIGNMENT_SPLIT_M2O.out.maf.join(ch_queries_bed), ch_target_bed, 'png' ) @@ -89,12 +89,12 @@ workflow PAIRALIGN_M2M { // Compute the one-to-one alignment and optionally plot it // - LAST_SPLIT_O2O ( - LAST_SPLIT_M2O.out.maf + ALIGNMENT_SPLIT_O2O ( + ALIGNMENT_SPLIT_M2O.out.maf ) if (! (params.skip_dotplot_o2o) ) { - LAST_DOTPLOT_O2O ( - LAST_SPLIT_O2O.out.maf.join(ch_queries_bed), + ALIGNMENT_DOTPLOT_O2O ( + ALIGNMENT_SPLIT_O2O.out.maf.join(ch_queries_bed), ch_target_bed, 'png' ) @@ -103,13 +103,13 @@ workflow PAIRALIGN_M2M { emit: multiqc = Channel.empty() - .mix( LAST_TRAIN.out.multiqc.collect{ it[1]} ) - .mix(LAST_SPLIT_O2O.out.multiqc.collect{ it[1]} ) - m2m = LAST_LASTAL_M2M.out.maf - m2o = LAST_SPLIT_M2O.out.maf - o2m = LAST_SPLIT_O2M.out.maf - o2o = LAST_SPLIT_O2O.out.maf - versions = LAST_LASTDB.out.versions + .mix( ALIGNMENT_TRAIN.out.multiqc.collect{ it[1]} ) + .mix(ALIGNMENT_SPLIT_O2O.out.multiqc.collect{ it[1]} ) + m2m = ALIGNMENT_LASTAL_M2M.out.maf + m2o = ALIGNMENT_SPLIT_M2O.out.maf + o2m = ALIGNMENT_SPLIT_O2M.out.maf + o2o = ALIGNMENT_SPLIT_O2O.out.maf + versions = ALIGNMENT_LASTDB.out.versions } /* diff --git a/subworkflows/local/pairalign_m2o/main.nf b/subworkflows/local/pairalign_m2o/main.nf index 2846017..8d902d2 100644 --- a/subworkflows/local/pairalign_m2o/main.nf +++ b/subworkflows/local/pairalign_m2o/main.nf @@ -4,12 +4,12 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { LAST_DOTPLOT as LAST_DOTPLOT_M2O } from '../../../modules/nf-core/last/dotplot/main' -include { LAST_DOTPLOT as LAST_DOTPLOT_O2O } from '../../../modules/nf-core/last/dotplot/main' -include { LAST_LASTAL as LAST_LASTAL_M2O } from '../../../modules/nf-core/last/lastal/main' -include { LAST_LASTDB } from '../../../modules/nf-core/last/lastdb/main' -include { LAST_SPLIT as LAST_SPLIT_O2O } from '../../../modules/nf-core/last/split/main' -include { LAST_TRAIN } from '../../../modules/nf-core/last/train/main' +include { LAST_DOTPLOT as ALIGNMENT_DOTPLOT_M2O } from '../../../modules/nf-core/last/dotplot/main' +include { LAST_DOTPLOT as ALIGNMENT_DOTPLOT_O2O } from '../../../modules/nf-core/last/dotplot/main' +include { LAST_LASTAL as ALIGNMENT_LASTAL_M2O } from '../../../modules/nf-core/last/lastal/main' +include { LAST_LASTDB as ALIGNMENT_LASTDB } from '../../../modules/nf-core/last/lastdb/main' +include { LAST_SPLIT as ALIGNMENT_SPLIT_O2O } from '../../../modules/nf-core/last/split/main' +include { LAST_TRAIN as ALIGNMENT_TRAIN } from '../../../modules/nf-core/last/train/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -29,30 +29,30 @@ workflow PAIRALIGN_M2O { // Index the target genome // - LAST_LASTDB ( + ALIGNMENT_LASTDB ( ch_target ) // Train alignment parameters // - LAST_TRAIN ( + ALIGNMENT_TRAIN ( ch_queries, - LAST_LASTDB.out.index.map { row -> row[1] } // Remove metadata map + ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map ) // Align queries to target. // This directly computes a many-to-one alignment because of parameter modules // - LAST_LASTAL_M2O ( - ch_queries.join(LAST_TRAIN.out.param_file), - LAST_LASTDB.out.index.map { row -> row[1] } // Remove metadata map + ALIGNMENT_LASTAL_M2O ( + ch_queries.join(ALIGNMENT_TRAIN.out.param_file), + ALIGNMENT_LASTDB.out.index.map { row -> row[1] } // Remove metadata map ) // Optionally plot the many-to-one alignment // if (! (params.skip_dotplot_m2o) ) { - LAST_DOTPLOT_M2O ( - LAST_LASTAL_M2O.out.maf.join(ch_queries_bed), + ALIGNMENT_DOTPLOT_M2O ( + ALIGNMENT_LASTAL_M2O.out.maf.join(ch_queries_bed), ch_target_bed, 'png' ) @@ -60,12 +60,12 @@ workflow PAIRALIGN_M2O { // Compute the one-to-one alignment and optionally plot it // - LAST_SPLIT_O2O ( - LAST_LASTAL_M2O.out.maf + ALIGNMENT_SPLIT_O2O ( + ALIGNMENT_LASTAL_M2O.out.maf ) if (! (params.skip_dotplot_o2o) ) { - LAST_DOTPLOT_O2O ( - LAST_SPLIT_O2O.out.maf.join(ch_queries_bed), + ALIGNMENT_DOTPLOT_O2O ( + ALIGNMENT_SPLIT_O2O.out.maf.join(ch_queries_bed), ch_target_bed, 'png' ) @@ -74,11 +74,11 @@ workflow PAIRALIGN_M2O { emit: multiqc = Channel.empty() - .mix( LAST_TRAIN.out.multiqc.collect{ it[1]} ) - .mix(LAST_SPLIT_O2O.out.multiqc.collect{ it[1]} ) - m2o = LAST_LASTAL_M2O.out.maf - o2o = LAST_SPLIT_O2O.out.maf - versions = LAST_LASTDB.out.versions + .mix( ALIGNMENT_TRAIN.out.multiqc.collect{ it[1]} ) + .mix(ALIGNMENT_SPLIT_O2O.out.multiqc.collect{ it[1]} ) + m2o = ALIGNMENT_LASTAL_M2O.out.maf + o2o = ALIGNMENT_SPLIT_O2O.out.maf + versions = ALIGNMENT_LASTDB.out.versions } /* diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index d349ee2..0b289f9 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -4,15 +4,15 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main' -include { CUSTOMMODULE } from '../modules/local/custommodule' -include { PAIRALIGN_M2M } from '../subworkflows/local/pairalign_m2m/main' -include { SEQTK_CUTN as SEQTK_CUTN_TARGET } from '../modules/nf-core/seqtk/cutn/main' -include { SEQTK_CUTN as SEQTK_CUTN_QUERY } from '../modules/nf-core/seqtk/cutn/main' -include { PAIRALIGN_M2O } from '../subworkflows/local/pairalign_m2o/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main' +include { MULTIQC_ASSEMBLYSCAN_PLOT_DATA } from '../modules/local/multiqc_assemblyscan_plot_data' +include { PAIRALIGN_M2M } from '../subworkflows/local/pairalign_m2m/main' +include { SEQTK_CUTN as CUTN_TARGET } from '../modules/nf-core/seqtk/cutn/main' +include { SEQTK_CUTN as CUTN_QUERY } from '../modules/nf-core/seqtk/cutn/main' +include { PAIRALIGN_M2O } from '../subworkflows/local/pairalign_m2o/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_pairgenomealign_pipeline' @@ -35,11 +35,11 @@ workflow PAIRGENOMEALIGN { // Extract coordinates of poly-N regions; they are often contig boundaries in scaffolds // - SEQTK_CUTN_TARGET ( + CUTN_TARGET ( // Avoid file name conflicts when target genome is also in the list of queries ch_targetgenome.map { meta, file -> [ [id:'targetGenome'] , file ] } ) - SEQTK_CUTN_QUERY ( + CUTN_QUERY ( ch_samplesheet ) @@ -49,7 +49,7 @@ workflow PAIRGENOMEALIGN { ch_samplesheet ) // Parse assembly-scan's JSON for MultiQC - CUSTOMMODULE ( + MULTIQC_ASSEMBLYSCAN_PLOT_DATA ( ASSEMBLYSCAN.out.json.collect{it[1]} ) @@ -57,7 +57,7 @@ workflow PAIRGENOMEALIGN { // ch_samplesheet = ch_samplesheet .map { row -> [ [id: params.targetName + '___' + row[0].id] , row.tail() ] } - ch_seqtk_cutn_query = SEQTK_CUTN_QUERY.out.bed + ch_seqtk_cutn_query = CUTN_QUERY.out.bed .map { row -> [ [id: params.targetName + '___' + row[0].id] , row.tail() ] } // Align with either the many-to-many or the many-to-one subworkflow @@ -67,7 +67,7 @@ workflow PAIRGENOMEALIGN { PAIRALIGN_M2O ( ch_targetgenome, ch_samplesheet, - SEQTK_CUTN_TARGET.out.bed, + CUTN_TARGET.out.bed, ch_seqtk_cutn_query ) pairalign_out = PAIRALIGN_M2O.out @@ -75,7 +75,7 @@ workflow PAIRGENOMEALIGN { PAIRALIGN_M2M ( ch_targetgenome, ch_samplesheet, - SEQTK_CUTN_TARGET.out.bed, + CUTN_TARGET.out.bed, ch_seqtk_cutn_query ) pairalign_out = PAIRALIGN_M2M.out @@ -85,9 +85,9 @@ workflow PAIRGENOMEALIGN { // ch_versions = ch_versions - .mix(SEQTK_CUTN_TARGET.out.versions) - .mix( ASSEMBLYSCAN.out.versions) - .mix( pairalign_out.versions) + .mix( CUTN_TARGET.out.versions) + .mix(ASSEMBLYSCAN.out.versions) + .mix( pairalign_out.versions) softwareVersionsToYAML(ch_versions) .collectFile( @@ -121,7 +121,7 @@ workflow PAIRGENOMEALIGN { ch_multiqc_files = ch_multiqc_files .mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - .mix(CUSTOMMODULE.out.tsv) + .mix(MULTIQC_ASSEMBLYSCAN_PLOT_DATA.out.tsv) .mix(pairalign_out.multiqc) .mix(ch_collated_versions) .mix(