Skip to content

Commit

Permalink
Merge pull request #132 from sanger-tol/pre-tag
Browse files Browse the repository at this point in the history
Merging Pre tag into main
  • Loading branch information
DLBPointon authored Sep 19, 2023
2 parents a2b1d55 + 6328583 commit d68fea5
Show file tree
Hide file tree
Showing 66 changed files with 1,270 additions and 262 deletions.
16 changes: 12 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,17 @@ jobs:
with:
version: "${{ matrix.NXF_VER }}"

- name: Run pipeline with test data
# TODO nf-core: You can customise CI pipeline run tests as required
# For example: adding multiple test runs with different parameters
- name: Download test data
# Download A fungal test data set that is full enough to show some real output.
run: |
curl https://dp24.cog.sanger.ac.uk/TreeValTinyData.tar.gz | tar xzf -
- name: Run RAPID pipeline with test data
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -entry RAPID -profile github_test,docker --outdir ./results-rapid
- name: Run FULL pipeline with test data
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
nextflow run ${GITHUB_WORKSPACE} -entry FULL -profile github_test,docker --outdir ./results-full
144 changes: 139 additions & 5 deletions .gitpod.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,148 @@
image: nfcore/gitpod:latest
image: gitpod/workspace-full-vnc
# Update 7th September to reflect code base changes
ports:
- name: JBrowseWeb
description: The JBrowse Webserver port
port: 3000
onOpen: open-browser
visibility: public

- name: HiGlass
description: The HiGlass port
port: 8989
onOpen: open-browser
visibility: public

tasks:
- name: Install Singularity 3.11.4
# https://docs.sylabs.io/guides/3.0/user-guide/installation.html
init: |
cd /workspace/treeval-curation/
sudo apt-get update && sudo apt-get install -y \
build-essential \
libssl-dev \
uuid-dev \
libgpgme11-dev \
squashfs-tools \
libseccomp-dev \
pkg-config
mkdir -p $GOPATH/src/github.com/sylabs && \
cd $GOPATH/src/github.com/sylabs && \
wget https://github.com/sylabs/singularity/releases/download/v3.11.4/singularity-ce-3.11.4.tar.gz && \
tar -xzf singularity-ce-3.11.4.tar.gz && \
cd ./singularity-ce-3.11.4 && \
./mconfig
./mconfig && \
make -C ./builddir && \
sudo make -C ./builddir install
- name: Install Nextflow
# https://www.nextflow.io/docs/latest/getstarted.html
init: |
cd /workspace/treeval-curation/
wget -qO- https://get.nextflow.io | bash
chmod +x nextflow
nextflow self-update
- name: Install JBrowse2
# https://jbrowse.org/jb2/download/#jbrowse-cli-tools
command: |
cd /workspace/treeval-curation/
npm install -g @jbrowse/cli
jbrowse create jbrowse2
cd jbrowse2/
npx serve . -l 3000
- name: Install TreeVal Pipeline
# https://github.com/sanger-tol/treeval
init: |
cd /workspace/treeval-curation/
git clone -b pre-tag https://github.com/sanger-tol/treeval.git
- name: Install Curtation Pretext
# https://github.com/sanger-tol/curationpretext
init: |
cd /workspace/treeval-curation/
git clone -b dev https://github.com/sanger-tol/curationpretext.git
- name: Install HiGlass
# https://docs.higlass.io/tutorial.html
init: |
cd /workspace/treeval-curation/
pip install higlass-manage
higlass-manage start
- name: Alias Nextflow
init: |
cd /workspace/treeval-curation/
echo "alias nextflow_cmd='/workspace/treeval-curation/nextflow'" >> ~/.bashrc
source ~/.bashrc
- name: Download busco for nematode
init: |
cd /workspace/treeval-curation/
curl https://dp24.cog.sanger.ac.uk/Busco.tar.gz | tar xzf -
- name: Download Nematode Test data and make synteny
init: |
cd /workspace/treeval-curation/
curl https://dp24.cog.sanger.ac.uk/Nematode.tar.gz | tar xzf -
mkdir -p /workspace/treeval-curation/synteny/nematode/
cp /workspace/treeval-curation/Oscheius_DF5033/genomic_data/Oscheius_DF5033.fa /workspace/treeval-curation/synteny/nematode/SuperNematode.fa
- name: Download Lepidoptera data
init: |
cd /workspace/treeval-curation/
curl https://dp24.cog.sanger.ac.uk/ilTorViri5.tar.gz | tar xzf -
- name: Download Genomic Alignment data
init: |
cd /workspace/treeval-curation/
curl https://dp24.cog.sanger.ac.uk/AlignmentData.tar.gz | tar xzf -
- name: Open Tutorial Page
init: |
gp preview https://bga23.org/treeval-curation/Tutorial/
github:
prebuilds:
# enable for the master/default branch (defaults to true)
master: true
# add a "Review in Gitpod" button as a comment to pull requests (defaults to true)
addComment: true
# add a "Review in Gitpod" button to pull requests (defaults to false)
addBadge: true
# add a label once the prebuild is ready to pull requests (defaults to false)
addLabel: prebuilt-in-gitpod

vscode:
extensions: # based on nf-core.nf-core-extensionpack
- codezombiech.gitignore # Language support for .gitignore files
# - cssho.vscode-svgviewer # SVG viewer
- esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code
- eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed
- EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files
- Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar
- mechatroner.rainbow-csv # Highlight columns in csv files in different colors
# - nextflow.nextflow # Nextflow syntax highlighting
- nextflow.nextflow # Nextflow syntax highlighting
- oderwat.indent-rainbow # Highlight indentation level
- streetsidesoftware.code-spell-checker # Spelling checker for source code
1 change: 1 addition & 0 deletions .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ repository_type: pipeline
lint:
files_exist:
- assets/nf-core-treeval_logo_light.png
- conf/test_full.config
- docs/images/nf-core-treeval_logo_light.png
- docs/images/nf-core-treeval_logo_dark.png
files_unchanged:
Expand Down
72 changes: 54 additions & 18 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ The essential pathways of the gEVAL pipeline have now been converted to Nextflow
- Subworkflow to generate syntenic alignments to high quality genomes.
- Subworkflow to generate tracks containing telomeric sites.
- Custom Groovy for reporting to provide file metrics and resource usage.
- Citations and all docs (including walkthroughs).
- Added gitpod.yml for running in the cloud. This is the tutorial written for BGA23.

### Parameters

Expand All @@ -36,24 +38,58 @@ The essential pathways of the gEVAL pipeline have now been converted to Nextflow

Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference.

| Module | Old Version | New Versions |
| ------------------------------ | ----------- | ---------------- |
| bedtools | - | 2.31.0 |
| busco | - | 5.4.3 |
| bwa-mem2 | - | 2.2.1 |
| cat | - | 2.3.4 |
| cooler | - | 0.9.2 |
| gnu-sort | - | 8.25 |
| minimap2 + samtools | - | 2.24 + 1.14 |
| miniprot | - | 0.11--he4a0461_2 |
| mummer | - | 3.23 |
| paftools (minimap2 + samtools) | - | 2.24 + 1.14 |
| pretextmap + samtools | - | 0.1.9 + 1.17 |
| samtools | - | 1.17 |
| seqtk | - | 1.4 |
| tabix | - | 1.11 |
| ucsc | - | 377 |
| windowmasker (blast) | - | 2.14.0 |
| Module | Old Version | New Versions |
| -------------------------------------- | ----------- | ---------------- |
| assign_ancestal ( pandas + Python ) | - | 1.5.2 + 3.9 |
| bamtobed_sort ( bedtools + samtools ) | - | 2.31.0 + 1.17 |
| bedtools | - | 2.31.0 |
| busco | - | 5.4.3 |
| bwa-mem2 | - | 2.2.1 |
| cat | - | 2.3.4 |
| chunk_fasta ( pyfasta ) | - | 0.5.2-1 |
| cooler | - | 0.9.2 |
| concat_block ( coreutils ) | - | 9.1 |
| concat_mummer ( coreutils ) | - | 9.1 |
| cram_filter_align_bwamem2_fixmate_sort | - | |
| ^ ( samtools + bwamem2 ) ^ | - | 1.16.1 + 2.2.1 |
| extract_ancestral ( python ) | - | 3.9 |
| extract_buscogene ( coreutils ) | - | 9.1 |
| extract_cov_id ( coreutils ) | - | 9.1 |
| extract_repeat ( perl ) | - | 5.26.2 |
| extract_telo ( coreutils ) | - | 9.1 |
| find_telomere_regions ( gcc ) | - | 7.1.0 |
| find_telomere_windows ( java-jdk ) | - | 8.0.112 |
| findhalfcoverage ( python ) | - | 3.9 |
| gap_length ( coreutils ) | - | 9.1 |
| generate_cram_csv ( samtools ) | - | 1.17 |
| get_largest_scaff ( coreutils ) | - | 9.1 |
| get_paired_contact_bed ( coreutils ) | - | 9.1 |
| get_synteny_genomes ( coreutils ) | - | 9.1 |
| getminmaxpunches ( coreutils ) | - | 9.1 |
| graphoverallcoverage ( perl ) | - | 5.26.2 |
| gnu-sort | - | 8.25 |
| juicer_tools_pre ( java-jdk ) | - | 8.0.112 |
| makecmap_cmap2bed ( python ) | - | 3.9 |
| makecmap_fa2cmapmulticolor ( perl ) | - | 5.26.2 |
| makecmap_renamecmapids ( perl ) | - | 5.26.2 |
| minimap2 + samtools | - | 2.24 + 1.14 |
| miniprot | - | 0.11--he4a0461_2 |
| mummer | - | 3.23 |
| paf_to_bed ( coreutils ) | - | 9.1 |
| paftools ( minimap2 + samtools ) | - | 2.24 + 1.14 |
| pretextmap + samtools | - | 0.1.9 + 1.17 |
| reformat_intersect ( coreutils ) | - | 9.1 |
| reformat_ids ( coreutils ) | - | 9.1 |
| replace_dots ( coreutils ) | - | 9.1 |
| samtools | - | 1.17 |
| selfcomp_alignmentblocks ( python ) | - | 3.9 |
| selfcomp_mapids ( python ) | - | 3.9 |
| selfcomp_mummer2bed ( python ) | - | 3.9 |
| selfcomp_splitfasta ( perl-bioperl ) | - | 1.7.8-1 |
| seqtk | - | 1.4 |
| tabix | - | 1.11 |
| ucsc | - | 377 |
| windowmasker (blast) | - | 2.14.0 |

### Fixed

Expand Down
3 changes: 2 additions & 1 deletion CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

> Abdennur, N. and Mirny, L.A. 2019. ‘Cooler: Scalable storage for hi-C data and other genomically labeled arrays’, Bioinformatics, 36(1), pp. 311–316. doi:10.1093/bioinformatics/btz540.
- [Find Telomere]()
- [Find Telomere](https://github.com/VGP/vgp-assembly/tree/master/pipeline/telomere)

> VGP. 2022. vgp-assembly telomere [online]. https://github.com/VGP/vgp-assembly/tree/master/pipeline/telomere. (Accessed on 28th February 2023).
Expand Down Expand Up @@ -95,6 +95,7 @@
> Morgulis, A., et al. 2006. WindowMasker: window-based masker for sequenced genomes. Bioinformatics. 22(2). pp.134–141. doi: 10.1093/bioinformatics/bti774.
- [lep_busco_painter](https://www.biorxiv.org/content/10.1101/2023.05.12.540473v1.full.pdf)

> Wright, C. et al. 2023. Chromosome evolution in Lepidoptera. bioRxiv. 540473. https://doi.org/10.1101/2023.05.12.540473
## Software packaging/containerisation tools
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ nextflow run main.nf -profile singularity --input treeval.yaml -entry {FULL|RAPI

An example treeval.yaml can be found [here](assets/local_testing/nxOscDF5033.yaml).

Further documentation about the pipeline can be found in the following files: [usage](https://nf-co.re/treeval/usage), [parameters](https://nf-co.re/treeval/parameters) and [output](https://nf-co.re/treeval/output).
Further documentation about the pipeline can be found in the following files: [usage](https://pipelines.tol.sanger.ac.uk/treeval/dev/usage), [parameters](https://pipelines.tol.sanger.ac.uk/treeval/dev/parameters) and [output](https://pipelines.tol.sanger.ac.uk/treeval/dev/output).

> **Warning:**
> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those
Expand Down
25 changes: 0 additions & 25 deletions assets/full_s3_treeval_test.yaml

This file was deleted.

31 changes: 31 additions & 0 deletions assets/github_testing/TreeValTinyTest-Local.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
assembly:
level: scaffold
sample_id: grTriPseu1
latin_name: to_provide_taxonomic_rank
classT: fungi
asmVersion: 1
dbVersion: "1"
gevalType: DTOL
reference_file: /nfs/treeoflife-01/teams/tola/users/dp24/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
assem_reads:
pacbio: /nfs/treeoflife-01/teams/tola/users/dp24/treeval/TreeValTinyData/genomic_data/pacbio/
hic: /nfs/treeoflife-01/teams/tola/users/dp24/treeval/TreeValTinyData/genomic_data/hic-arima/
supplementary: path
alignment:
data_dir: /nfs/treeoflife-01/teams/tola/users/dp24/treeval/TreeValTinyData/gene_alignment_data/
common_name: "" # For future implementation (adding bee, wasp, ant etc)
geneset: "LaetiporusSulphureus.gfLaeSulp1"
#Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
self_comp:
motif_len: 0
mummer_chunk: 10
synteny:
synteny_genome_path: /nfs/treeoflife-01/teams/tola/users/dp24/treeval/TreeValTinyData/synteny/
outdir: "NEEDS TESTING"
intron:
size: "50k"
telomere:
teloseq: TTAGGG
busco:
lineages_path: /nfs/treeoflife-01/teams/tola/users/dp24/treeval/TreeValTinyData/busco/subset/
lineage: fungi_odb10
31 changes: 31 additions & 0 deletions assets/github_testing/TreeValTinyTest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
assembly:
level: scaffold
sample_id: grTriPseu1
latin_name: to_provide_taxonomic_rank
classT: fungi
asmVersion: 1
dbVersion: "1"
gevalType: DTOL
reference_file: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa
assem_reads:
pacbio: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/
hic: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/hic-arima/
supplementary: path
alignment:
data_dir: /home/runner/work/treeval/treeval/TreeValTinyData/gene_alignment_data/
common_name: "" # For future implementation (adding bee, wasp, ant etc)
geneset: "LaetiporusSulphureus.gfLaeSulp1"
#Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
self_comp:
motif_len: 0
mummer_chunk: 10
synteny:
synteny_genome_path: /home/runner/work/treeval/treeval/TreeValTinyData/synteny/
outdir: "NEEDS TESTING"
intron:
size: "50k"
telomere:
teloseq: TTAGGG
busco:
lineages_path: /home/runner/work/treeval/treeval/TreeValTinyData/busco/subset/
lineage: fungi_odb10
Loading

0 comments on commit d68fea5

Please sign in to comment.