From b7ce998c43bddc7fecca9d75a9951734f6f1f24b Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Fri, 15 Sep 2023 13:33:06 +0100 Subject: [PATCH 01/20] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ee344d9..6923b75 100755 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Now, you can run the pipeline using: // For ALL_FILES run nextflow run sanger-tol/curationpretext \ -profile \ - --fasta path/to/assembly.fa \ + --input path/to/assembly.fa \ --cram path/to/cram/ \ --pacbio path/to/pacbio/fasta/ \ --teloseq TTAGGG \ @@ -71,7 +71,7 @@ nextflow run sanger-tol/curationpretext \ // For MAPS_ONLY run nextflow run sanger-tol/curationpretext \ -profile \ - --fasta path/to/assembly.fa \ + --input path/to/assembly.fa \ --cram path/to/cram/ \ --sample { default is "pretext_rerun" } -entry MAPS_ONLY \ From 28abc047c60c40c97d5b4f14d32c0dadad308467 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 19 Sep 2023 10:44:36 +0100 Subject: [PATCH 02/20] Update usage.md Updating to add more information on running the pipeline --- docs/usage.md | 78 ++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 50 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 9ced252..8b3ea56 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,68 +6,45 @@ ## Introduction - +This is a sister pipeline to [TreeVal](https://github.com/sanger-tol/treeval/) which generated a plurality of data for the curation of reference-quality genomes. curationpretext is a subset of TreeVal that produces soley the Pretext maps and accessory files -## Samplesheet input +Currently, the pipeline expects input data to be in a specific format. +The `--input` should be `.fasta` or `.fa` (the same format but differing suffix). +The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`. +The `--pacbio` should point to the folder containing `.fasta.gz` files. -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +If you do not have these file formats we have also included instructions on converting from common formats to our preferred format. +If there is a popular public preference for a particular format, we can modify the pipeline to utilise those formats. Just submit an issue. -```bash ---input '[path to samplesheet file]' -``` - -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - -### Full samplesheet - -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, -``` - -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +The conversion documentation can be found [HERE](conversion_docs.md). -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +The pipeline currently does not ingest the accessory files into the pretext map for you, however we have included this code [HERE](https://github.com/sanger-tol/treeval/wiki/Ingesting-PreText-Accessory-Files). ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run sanger-tol/curationpretext --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run sanger-tol/curationpretext \ + --input { input.fasta } \ + --cram { path/to/cram/ } \ + --pacbio { path/to/pacbio/fasta/ } \ + --sample { default is "pretext_rerun" } \ + --teloseq TTAGGG \ + --outdir { OUTDIR } \ + -profile \ + -entry \ ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. +Above arguments surrounded with `{}` are user-defined values, those in `<>` are choices made between the shown values. Note that the pipeline will create the following files in your working directory: ```bash -work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files +/pipeline_info # Finished results in specified location (defined with --outdir) +/hic_files # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -79,7 +56,7 @@ Pipeline settings can be provided in a `yaml` or `json` file via `-params-file < > The above pipeline run specified with a params file in yaml format: ```bash -nextflow run sanger-tol/curationpretext -profile docker -params-file params.yaml +nextflow run sanger-tol/curationpretext -profile docker -params-file params.yaml -entry ``` with `params.yaml` containing: @@ -87,9 +64,10 @@ with `params.yaml` containing: ```yaml input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' -input: 'data' -<...> +teloseq: 'GRCh37' +sample: 'data' +pacbio: 'pacbio_path' +cram: 'cram_path' ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). From 215c2e657ff04eeffd97d7659b30f693946102c6 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 19 Sep 2023 10:46:17 +0100 Subject: [PATCH 03/20] Update usage.md --- docs/usage.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 8b3ea56..76f8a2a 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,9 +16,9 @@ The `--pacbio` should point to the folder containing `.fasta.gz` files. If you do not have these file formats we have also included instructions on converting from common formats to our preferred format. If there is a popular public preference for a particular format, we can modify the pipeline to utilise those formats. Just submit an issue. -The conversion documentation can be found [HERE](conversion_docs.md). +The conversion documentation can be found in the [TreeVal Wiki](https://github.com/sanger-tol/treeval/wiki/Data-Preparation). -The pipeline currently does not ingest the accessory files into the pretext map for you, however we have included this code [HERE](https://github.com/sanger-tol/treeval/wiki/Ingesting-PreText-Accessory-Files). +The pipeline currently does not ingest the accessory files into the pretext map for you, however, we have included this code in the [TreeVal Wiki](https://github.com/sanger-tol/treeval/wiki/Ingesting-PreText-Accessory-Files). ## Running the pipeline From fdb7c3f32a175e29fd0bc0a654c246201a0ad524 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 19 Sep 2023 10:55:52 +0100 Subject: [PATCH 04/20] Update usage.md --- docs/usage.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 76f8a2a..520f53e 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -9,16 +9,22 @@ This is a sister pipeline to [TreeVal](https://github.com/sanger-tol/treeval/) which generated a plurality of data for the curation of reference-quality genomes. curationpretext is a subset of TreeVal that produces soley the Pretext maps and accessory files Currently, the pipeline expects input data to be in a specific format. + The `--input` should be `.fasta` or `.fa` (the same format but differing suffix). + The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`. + The `--pacbio` should point to the folder containing `.fasta.gz` files. If you do not have these file formats we have also included instructions on converting from common formats to our preferred format. If there is a popular public preference for a particular format, we can modify the pipeline to utilise those formats. Just submit an issue. -The conversion documentation can be found in the [TreeVal Wiki](https://github.com/sanger-tol/treeval/wiki/Data-Preparation). +The conversion documentation can be found in: + + - [TreeVal Wiki - PacBio Data](https://github.com/sanger-tol/treeval/wiki/Data-Preparation#pacbio-data). + - [TreeVal Wiki - Cram Data](https://github.com/sanger-tol/treeval/wiki/Data-Preparation#hic-data). -The pipeline currently does not ingest the accessory files into the pretext map for you, however, we have included this code in the [TreeVal Wiki](https://github.com/sanger-tol/treeval/wiki/Ingesting-PreText-Accessory-Files). +The pipeline currently does not ingest the accessory files into the pretext map for you, however, we have included this code in the [TreeVal Wiki - Ingesting PreText Accessory Files](https://github.com/sanger-tol/treeval/wiki/Ingesting-PreText-Accessory-Files). ## Running the pipeline From 087f3a92ea3518affce2ba957e7fe93c02ee5435 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 19 Sep 2023 11:00:11 +0100 Subject: [PATCH 05/20] Update README.md --- README.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 6923b75..cd0ef15 100755 --- a/README.md +++ b/README.md @@ -12,16 +12,16 @@ ## Introduction -**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes. +**sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes. -This is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. However, can be simply used to generate pretext maps. +This is intended as a supplementary pipeline for the [treeval](https://github.com/sanger-tol/treeval) project. This pipeline can be simply used to generate pretext maps, information on how to run this pipeline can be found in the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage). -1. Generate Maps - Generates pretext maps aswell as a static image. +1. Generate Maps - Generates pretext maps as well as a static image. -2. Accessory files - Generates the repeat density, gap, telomere and coverage tracks. +2. Accessory files - Generates the repeat density, gap, telomere, and coverage tracks. ## Usage @@ -34,19 +34,19 @@ Currently, the pipeline uses the following flags: - --input - - The absolute path to the assembled genome in, e.g, `/path/to/assembly.fa` + - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa` - --pacbio - - The directory of the fasta files generated from pacbio reads, e.g, `/path/to/fasta/` + - The directory of the fasta files generated from pacbio reads, e.g., `/path/to/fasta/` - --cram - - The directory of the cram _and_ cram.crai files, e.g, `/path/to/cram/` + - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/` - --teloseq - - A telomeric sequence, e.g, `TTAGGG` + - A telomeric sequence, e.g., `TTAGGG` - -entry - ALL_FILES generates all accessory files as well as pretext maps @@ -81,15 +81,14 @@ nextflow run sanger-tol/curationpretext \ > **Warning:** > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/curationpretext/usage) and the [parameter documentation](https://nf-co.re/curationpretext/parameters). +For more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/parameters). ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/curationpretext/results) tab on the nf-core website pipeline page. +To see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the -[output documentation](https://nf-co.re/curationpretext/output). +[output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output). ## Credits From d7801e2b316389c113812297332bab586d616161 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 11:31:42 +0100 Subject: [PATCH 06/20] Adding testing --- .github/workflows/ci.yml | 20 ++++++++++++++++---- docs/usage.md | 20 ++++++++++---------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 017d244..bd8c7da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,9 +35,21 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters + - name: Download test data + # Download A fungal test data set that is full enough to show some real output. + run: | + curl https://dp24.cog.sanger.ac.uk/TreeValTinyData.tar.gz | tar xzf - + + - name: Tree test data + run: | + tree TreeValTinyData + + - name: Run ONLY_MAPS pipeline with test data + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} --input /TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /TreeValTinyData/genomic_data/pacbio/ --cram /TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ONLY_MAPS + + - name: Run ALL_FILES pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} --input /TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /TreeValTinyData/genomic_data/pacbio/ --cram /TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ALL_FILES diff --git a/docs/usage.md b/docs/usage.md index 520f53e..adf57f6 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -20,9 +20,9 @@ If you do not have these file formats we have also included instructions on conv If there is a popular public preference for a particular format, we can modify the pipeline to utilise those formats. Just submit an issue. The conversion documentation can be found in: - - - [TreeVal Wiki - PacBio Data](https://github.com/sanger-tol/treeval/wiki/Data-Preparation#pacbio-data). - - [TreeVal Wiki - Cram Data](https://github.com/sanger-tol/treeval/wiki/Data-Preparation#hic-data). + +- [TreeVal Wiki - PacBio Data](https://github.com/sanger-tol/treeval/wiki/Data-Preparation#pacbio-data). +- [TreeVal Wiki - Cram Data](https://github.com/sanger-tol/treeval/wiki/Data-Preparation#hic-data). The pipeline currently does not ingest the accessory files into the pretext map for you, however, we have included this code in the [TreeVal Wiki - Ingesting PreText Accessory Files](https://github.com/sanger-tol/treeval/wiki/Ingesting-PreText-Accessory-Files). @@ -36,7 +36,7 @@ nextflow run sanger-tol/curationpretext \ --cram { path/to/cram/ } \ --pacbio { path/to/pacbio/fasta/ } \ --sample { default is "pretext_rerun" } \ - --teloseq TTAGGG \ + --teloseq {TTAGGG} \ --outdir { OUTDIR } \ -profile \ -entry \ @@ -68,12 +68,12 @@ nextflow run sanger-tol/curationpretext -profile docker -params-file params.yaml with `params.yaml` containing: ```yaml -input: './samplesheet.csv' -outdir: './results/' -teloseq: 'GRCh37' -sample: 'data' -pacbio: 'pacbio_path' -cram: 'cram_path' +input: "./samplesheet.csv" +outdir: "./results/" +teloseq: "GRCh37" +sample: "data" +pacbio: "pacbio_path" +cram: "cram_path" ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). From 7131d3f5bb195774aa5e891a9fecd773d2b67bd4 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 11:36:56 +0100 Subject: [PATCH 07/20] Adding testing --- .github/workflows/ci.yml | 4 ++-- conf/test.config | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd8c7da..ca1acbc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,9 +47,9 @@ jobs: - name: Run ONLY_MAPS pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} --input /TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /TreeValTinyData/genomic_data/pacbio/ --cram /TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ONLY_MAPS + nextflow run ${GITHUB_WORKSPACE} --input /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ --cram /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ONLY_MAPS - name: Run ALL_FILES pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} --input /TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /TreeValTinyData/genomic_data/pacbio/ --cram /TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ALL_FILES + nextflow run ${GITHUB_WORKSPACE} --input /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ --cram /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ALL_FILES diff --git a/conf/test.config b/conf/test.config index 05543d2..8c89899 100755 --- a/conf/test.config +++ b/conf/test.config @@ -19,11 +19,4 @@ params { max_memory = '6.GB' max_time = '6.h' - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' } From d80247641d47caec4404868610c41b71a26685ff Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 11:40:44 +0100 Subject: [PATCH 08/20] Adding testing --- .github/workflows/ci.yml | 4 ++-- docs/usage.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca1acbc..6594696 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,10 +44,10 @@ jobs: run: | tree TreeValTinyData - - name: Run ONLY_MAPS pipeline with test data + - name: Run MAPS_ONLY pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} --input /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ --cram /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ONLY_MAPS + nextflow run ${GITHUB_WORKSPACE} --input /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ --cram /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry MAPS_ONLY - name: Run ALL_FILES pipeline with test data # Remember that you can parallelise this by using strategy.matrix diff --git a/docs/usage.md b/docs/usage.md index adf57f6..6df1186 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -62,7 +62,7 @@ Pipeline settings can be provided in a `yaml` or `json` file via `-params-file < > The above pipeline run specified with a params file in yaml format: ```bash -nextflow run sanger-tol/curationpretext -profile docker -params-file params.yaml -entry +nextflow run sanger-tol/curationpretext -profile docker -params-file params.yaml -entry ``` with `params.yaml` containing: From 6ac317792c1686a3c19dc060ece522cf5a8555f9 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 12:00:52 +0100 Subject: [PATCH 09/20] Adding testing --- .github/workflows/ci.yml | 4 ++-- conf/test.config | 14 ++++++++++---- subworkflows/local/generate_maps.nf | 25 ++++++++++++++++++------- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6594696..3c88322 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,9 +47,9 @@ jobs: - name: Run MAPS_ONLY pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} --input /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ --cram /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry MAPS_ONLY + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results -entry MAPS_ONLY - name: Run ALL_FILES pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} --input /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa --pacbio /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ --cram /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ --teloseq TTAGGG -profile test,docker --outdir ./results -entry ALL_FILES + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results -entry ALL_FILES diff --git a/conf/test.config b/conf/test.config index 8c89899..85dbba3 100755 --- a/conf/test.config +++ b/conf/test.config @@ -11,12 +11,18 @@ */ params { - config_profile_name = 'Test profile' + config_profile_name = 'GitHub Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + input = /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa + outdir = ./results + pacbio = /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ + cram = /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ + sample = CurationPretextTest + teloseq = TTAGGG } diff --git a/subworkflows/local/generate_maps.nf b/subworkflows/local/generate_maps.nf index 10b26e4..1ca44f1 100755 --- a/subworkflows/local/generate_maps.nf +++ b/subworkflows/local/generate_maps.nf @@ -134,13 +134,24 @@ workflow GENERATE_MAPS { ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) // - // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES - // - PRETEXTMAP_HIGHRES ( - pretext_input.input_bam, - pretext_input.reference - ) - ch_versions = ch_versions.mix(PRETEXTMAP_HIGHRES.out.versions) + // LOGIC: HIRES IS TOO INTENSIVE FOR RUNNING IN GITHUB CI SO THIS STOPS IT RUNNING + // + if ( params.config_profile_name ) { + config_profile_name = params.config_profile_name + } else { + config_profile_name = 'Local' + } + + if ( !config_profile_name.contains('GitHub') ) { + // + // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES + // + PRETEXTMAP_HIGHRES ( + pretext_input.input_bam, + pretext_input.reference + ) + ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) + } // // MODULE: GENERATE PNG FROM STANDARD PRETEXT From 74d7bf83756fd5cf52cff89fa57d6c03ccd3d7dd Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 12:03:20 +0100 Subject: [PATCH 10/20] Adding testing --- conf/test.config | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/test.config b/conf/test.config index 85dbba3..ca32632 100755 --- a/conf/test.config +++ b/conf/test.config @@ -19,10 +19,10 @@ params { max_memory = '6.GB' max_time = '6.h' - input = /home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa - outdir = ./results - pacbio = /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/ - cram = /home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/ - sample = CurationPretextTest - teloseq = TTAGGG + input = '/home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa' + outdir = './results' + pacbio = '/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/' + cram = '/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/' + sample = 'CurationPretextTest' + teloseq = 'TTAGGG' } From f055b3610e27b8af18a10b582f338d39f3a56809 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 12:05:14 +0100 Subject: [PATCH 11/20] Adding testing --- subworkflows/local/generate_maps.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/generate_maps.nf b/subworkflows/local/generate_maps.nf index 1ca44f1..33c14ed 100755 --- a/subworkflows/local/generate_maps.nf +++ b/subworkflows/local/generate_maps.nf @@ -173,7 +173,7 @@ workflow GENERATE_MAPS { emit: standrd_pretext = PRETEXTMAP_STANDRD.out.pretext standrd_snpshot = SNAPSHOT_SRES.out.image - highres_pretext = PRETEXTMAP_HIGHRES.out.pretext + //highres_pretext = PRETEXTMAP_HIGHRES.out.pretext //highres_snpshot = SNAPSHOT_HRES.out.image versions = ch_versions.ifEmpty(null) From 1956859b8149630e2c1a42b3fcdbe81d7f4f6cda Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 12:49:18 +0100 Subject: [PATCH 12/20] Attempting to fix testing --- conf/test.config | 12 ++++++------ subworkflows/local/generate_maps.nf | 2 +- workflows/curationpretext_allf.nf | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index ca32632..2d696df 100755 --- a/conf/test.config +++ b/conf/test.config @@ -19,10 +19,10 @@ params { max_memory = '6.GB' max_time = '6.h' - input = '/home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa' - outdir = './results' - pacbio = '/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/' - cram = '/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/' - sample = 'CurationPretextTest' - teloseq = 'TTAGGG' + input = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa" + outdir = "./results" + pacbio = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/" + cram = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/" + sample = "CurationPretextTest" + teloseq = "TTAGGG" } diff --git a/subworkflows/local/generate_maps.nf b/subworkflows/local/generate_maps.nf index 33c14ed..8b9c84f 100755 --- a/subworkflows/local/generate_maps.nf +++ b/subworkflows/local/generate_maps.nf @@ -74,7 +74,7 @@ workflow GENERATE_MAPS { bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1] ) } - .set { ch_filtering_input } + .set { ch_filtering_input } // // MODULE: parallel proccessing bwa-mem2 alignment by given interval of containers from cram files diff --git a/workflows/curationpretext_allf.nf b/workflows/curationpretext_allf.nf index 93c6824..ec6f946 100755 --- a/workflows/curationpretext_allf.nf +++ b/workflows/curationpretext_allf.nf @@ -92,6 +92,7 @@ workflow.onComplete { if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } + // TreeValProject.summary(workflow, reference_tuple, summary_params, projectDir) } /* From 967d21d072c4e38298640a31217c41a9982ee660 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 12:54:10 +0100 Subject: [PATCH 13/20] Attempting to fix testing --- modules/local/bamtobed_sort.nf | 2 +- modules/local/cram_filter_align_bwamem2_fixmate_sort.nf | 2 +- modules/local/extract_repeat.nf | 2 +- modules/local/findhalfcoverage.nf | 2 +- modules/local/graphoverallcoverage.nf | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/local/bamtobed_sort.nf b/modules/local/bamtobed_sort.nf index 3554159..7260ac4 100755 --- a/modules/local/bamtobed_sort.nf +++ b/modules/local/bamtobed_sort.nf @@ -4,7 +4,7 @@ process BAMTOBED_SORT { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' : - 'quay.io/biocontainers/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' }" + 'biocontainers/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' }" input: tuple val(meta), path(bam) diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf index d275dd4..3f15dfc 100755 --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -4,7 +4,7 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' : - 'quay.io/biocontainers/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' }" + 'biocontainers/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' }" input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix) diff --git a/modules/local/extract_repeat.nf b/modules/local/extract_repeat.nf index 13f2898..8258a30 100755 --- a/modules/local/extract_repeat.nf +++ b/modules/local/extract_repeat.nf @@ -5,7 +5,7 @@ process EXTRACT_REPEAT { conda "conda-forge::perl=5.26.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : - 'quay.io/biocontainers/perl:5.26.2' }" + 'biocontainers/perl:5.26.2' }" input: tuple val( meta ), path( file ) diff --git a/modules/local/findhalfcoverage.nf b/modules/local/findhalfcoverage.nf index 5659281..529f543 100755 --- a/modules/local/findhalfcoverage.nf +++ b/modules/local/findhalfcoverage.nf @@ -5,7 +5,7 @@ process FINDHALFCOVERAGE { conda "conda-forge::python=3.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9' : - 'quay.io/biocontainers/python:3.9' }" + 'biocontainers/python:3.9' }" input: tuple val(meta), path(bedfile) diff --git a/modules/local/graphoverallcoverage.nf b/modules/local/graphoverallcoverage.nf index 3d9b054..217f1f5 100755 --- a/modules/local/graphoverallcoverage.nf +++ b/modules/local/graphoverallcoverage.nf @@ -5,7 +5,7 @@ process GRAPHOVERALLCOVERAGE { conda "conda-forge::perl=5.26.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : - 'quay.io/biocontainers/perl:5.26.2' }" + 'biocontainers/perl:5.26.2' }" input: tuple val(meta), path(bed) From 0ff9bd6a50d6c705d19e83353944f6a09c648b58 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 13:38:55 +0100 Subject: [PATCH 14/20] Attempting to fix testing --- .github/workflows/ci.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c88322..7dbaec1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,10 +40,6 @@ jobs: run: | curl https://dp24.cog.sanger.ac.uk/TreeValTinyData.tar.gz | tar xzf - - - name: Tree test data - run: | - tree TreeValTinyData - - name: Run MAPS_ONLY pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | From e2b677781636984cf57c6997b6891b33040c5e63 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 14:09:13 +0100 Subject: [PATCH 15/20] Attempting to fix testing --- modules/local/extract_telo.nf | 2 +- modules/local/find_telomere_windows.nf | 2 +- modules/local/generate_genome_file.nf | 2 +- modules/local/get_largest_scaff.nf | 2 +- modules/local/getminmaxpunches.nf | 2 +- modules/local/reformat_intersect.nf | 2 +- modules/local/rename_ids.nf | 2 +- modules/local/replace_dots.nf | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index f78e0a3..cad234f 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -5,7 +5,7 @@ process EXTRACT_TELO { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( file ) diff --git a/modules/local/find_telomere_windows.nf b/modules/local/find_telomere_windows.nf index ac9584e..2fcd002 100755 --- a/modules/local/find_telomere_windows.nf +++ b/modules/local/find_telomere_windows.nf @@ -6,7 +6,7 @@ process FIND_TELOMERE_WINDOWS { container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : - 'quay.io/biocontainers/java-jdk:8.0.112--1' }" + 'biocontainers/java-jdk:8.0.112--1' }" input: tuple val( meta ), path( file ) diff --git a/modules/local/generate_genome_file.nf b/modules/local/generate_genome_file.nf index 75c96d8..233e9ff 100755 --- a/modules/local/generate_genome_file.nf +++ b/modules/local/generate_genome_file.nf @@ -5,7 +5,7 @@ process GENERATE_GENOME_FILE { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( fai ) diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf index 1819f4d..6348eda 100755 --- a/modules/local/get_largest_scaff.nf +++ b/modules/local/get_largest_scaff.nf @@ -6,7 +6,7 @@ process GET_LARGEST_SCAFF { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( file ) diff --git a/modules/local/getminmaxpunches.nf b/modules/local/getminmaxpunches.nf index 8dbe13d..7a42e3d 100755 --- a/modules/local/getminmaxpunches.nf +++ b/modules/local/getminmaxpunches.nf @@ -5,7 +5,7 @@ process GETMINMAXPUNCHES{ conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) diff --git a/modules/local/reformat_intersect.nf b/modules/local/reformat_intersect.nf index 0842fae..1d1930c 100755 --- a/modules/local/reformat_intersect.nf +++ b/modules/local/reformat_intersect.nf @@ -5,7 +5,7 @@ process REFORMAT_INTERSECT { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( file ) diff --git a/modules/local/rename_ids.nf b/modules/local/rename_ids.nf index 545d0c4..f69f518 100755 --- a/modules/local/rename_ids.nf +++ b/modules/local/rename_ids.nf @@ -5,7 +5,7 @@ process RENAME_IDS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( file ) diff --git a/modules/local/replace_dots.nf b/modules/local/replace_dots.nf index 0f266ca..4d12f5c 100755 --- a/modules/local/replace_dots.nf +++ b/modules/local/replace_dots.nf @@ -5,7 +5,7 @@ process REPLACE_DOTS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( file ) From 7dab39b5fbed59ec570fd9c9b647f201a3296209 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 14:36:34 +0100 Subject: [PATCH 16/20] Attempting to fix testing --- modules/local/gap_length.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf index 3273720..54f47df 100755 --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -5,7 +5,7 @@ process GAP_LENGTH { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'docker.io/ubuntu:20.04' }" input: tuple val( meta ), path( file ) From e069be5d74244ff02aedb364dfbd99a3bed6f9ca Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 15:14:17 +0100 Subject: [PATCH 17/20] Adding files for tower use --- .github/workflows/sanger_test.yml | 29 +++++++++++++++++ .github/workflows/sanger_test_full.yml | 43 ++++++++++++++++++++++++++ nextflow.config | 1 + 3 files changed, 73 insertions(+) create mode 100644 .github/workflows/sanger_test.yml create mode 100644 .github/workflows/sanger_test_full.yml diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml new file mode 100644 index 0000000..e69af1e --- /dev/null +++ b/.github/workflows/sanger_test.yml @@ -0,0 +1,29 @@ +name: sanger-tol LSF tests + +on: + workflow_dispatch: +jobs: + run-tower: + name: Run LSF tests + runs-on: ubuntu-latest + steps: + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} + workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ github.sha }} + parameters: | + { + "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ github.sha }}", + } + profiles: test,sanger,singularity,cleanup + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/sanger_test_full.yml b/.github/workflows/sanger_test_full.yml new file mode 100644 index 0000000..e028c6b --- /dev/null +++ b/.github/workflows/sanger_test_full.yml @@ -0,0 +1,43 @@ +name: sanger-tol LSF full size tests + +on: + push: + branches: + - main + - dev + workflow_dispatch: +jobs: + run-tower: + name: Run LSF full size tests + runs-on: ubuntu-latest + steps: + - name: Sets env vars for push + run: | + echo "REVISION=${{ github.sha }}" >> $GITHUB_ENV + if: github.event_name == 'push' + + - name: Sets env vars for workflow_dispatch + run: | + echo "REVISION=${{ github.sha }}" >> $GITHUB_ENV + if: github.event_name == 'workflow_dispatch' + + - name: Launch workflow via tower + uses: seqeralabs/action-tower-launch@v2 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ env.REVISION }} + workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ env.REVISION }} + parameters: | + { + "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}", + } + profiles: test_full,sanger,singularity,cleanup + + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: | + tower_action_*.log + tower_action_*.json diff --git a/nextflow.config b/nextflow.config index a6f65ea..a1ec9d8 100755 --- a/nextflow.config +++ b/nextflow.config @@ -68,6 +68,7 @@ try { profiles { + cleanup { cleanup = true } debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' From 8e3923ce443bb0a1e64e287b5bcdd96df9e95d3b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 19 Sep 2023 16:44:18 +0100 Subject: [PATCH 18/20] Added sanger testing CI --- .github/workflows/ci.yml | 2 +- README.md | 1 - conf/test_full.config | 8 ++++++-- main.nf | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7dbaec1..e8ec2ab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,4 +48,4 @@ jobs: - name: Run ALL_FILES pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results -entry ALL_FILES + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/README.md b/README.md index cd0ef15..03bd2bb 100755 --- a/README.md +++ b/README.md @@ -65,7 +65,6 @@ nextflow run sanger-tol/curationpretext \ --pacbio path/to/pacbio/fasta/ \ --teloseq TTAGGG \ --sample { default is "pretext_rerun" } - -entry ALL_FILES \ --outdir path/to/outdir/ // For MAPS_ONLY run diff --git a/conf/test_full.config b/conf/test_full.config index ce8664d..d88e979 100755 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,6 +21,10 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - // Genome references - genome = 'R64-1-1' + input = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa" + outdir = "./results" + pacbio = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/" + cram = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/" + sample = "CurationPretextTest" + teloseq = "TTAGGG" } diff --git a/main.nf b/main.nf index 836492b..14374d6 100755 --- a/main.nf +++ b/main.nf @@ -57,7 +57,7 @@ workflow SANGERTOL_CURATIONPRETEXT_MAPS { // WORKFLOW: Execute a single named workflow for the pipeline // See: https://github.com/nf-core/rnaseq/issues/619 // -workflow ALL_FILES { +workflow { SANGERTOL_CURATIONPRETEXT_ALL_FILES () } From 791443bbcb02fea5ae86b5ff84de66bc6c3a3589 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 20 Sep 2023 15:37:12 +0100 Subject: [PATCH 19/20] Update for testing --- .github/workflows/ci.yml | 2 +- conf/test_full.config | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8ec2ab..565fd6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: - name: Download test data # Download A fungal test data set that is full enough to show some real output. run: | - curl https://dp24.cog.sanger.ac.uk/TreeValTinyData.tar.gz | tar xzf - + curl https://tolit.cog.sanger.ac.uk/test-data/TreeValTinyData.tar.gz | tar xzf - - name: Run MAPS_ONLY pipeline with test data # Remember that you can parallelise this by using strategy.matrix diff --git a/conf/test_full.config b/conf/test_full.config index d88e979..71fc824 100755 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,10 +21,10 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - input = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa" + input = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta" outdir = "./results" - pacbio = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/" - cram = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/" + pacbio = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/" + cram = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/" sample = "CurationPretextTest" teloseq = "TTAGGG" } From 6d7f3971c2aff1e1d11f3bce6a53ac7092f0c1e0 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 20 Sep 2023 15:38:49 +0100 Subject: [PATCH 20/20] Update for testing --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 565fd6c..0300dca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: - name: Download test data # Download A fungal test data set that is full enough to show some real output. run: | - curl https://tolit.cog.sanger.ac.uk/test-data/TreeValTinyData.tar.gz | tar xzf - + curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - - name: Run MAPS_ONLY pipeline with test data # Remember that you can parallelise this by using strategy.matrix