Add projection command #542
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
push: | |
branches: | |
- '*' | |
pull_request: | |
branches: | |
- '*' | |
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | |
jobs: | |
test: | |
name: test PPanGGOLiN on ${{ matrix.os }} with python ${{ matrix.python-version }} | |
# The type of runner that the job will run on | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: ['ubuntu-latest', 'macos-latest'] | |
python-version: ['3.7', '3.8', '3.9', '3.10'] | |
steps: | |
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it | |
- uses: actions/checkout@v2 | |
# Setting up miniconda | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
condarc-file: .condarc.yml | |
activate-environment: test | |
python-version: ${{ matrix.python-version }} | |
# Install the dependencies | |
- name: Set up test environment | |
shell: bash -l {0} | |
run: | | |
conda install -y --file requirements.txt | |
conda install -y pytest | |
pip install . | |
# Check that it is installed and displays help without error | |
- name: Check that PPanGGOLiN is installed | |
shell: bash -l {0} | |
run: | | |
ppanggolin --version | |
ppanggolin --help | |
# Check that unit tests are all passing | |
- name: Unit tests | |
shell: bash -l {0} | |
run: pytest | |
# Test the complete workflow | |
- name: Complete workflow | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin all --cpu 1 --fasta organisms.fasta.list --output mybasicpangenome | |
ppanggolin info --pangenome mybasicpangenome/pangenome.h5 --content --parameters --status | |
cd - | |
# test most options calls. If there is a change in the API somewhere that was not taken into account (whether in the options for the users, or the classes for the devs), this should fail, otherwise everything is probably good. | |
#--draw_hotspots option is problematic on macOS. | |
- name: Step by Step workflow with most options calls | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin annotate --fasta organisms.fasta.list --output stepbystep --kingdom bacteria | |
ppanggolin cluster -p stepbystep/pangenome.h5 --coverage 0.8 --identity 0.8 | |
ppanggolin graph -p stepbystep/pangenome.h5 -r 10 | |
ppanggolin partition --output stepbystep -f -p stepbystep/pangenome.h5 --cpu 1 -b 2.6 -ms 10 -fd -ck 500 -Kmm 3 12 -im 0.04 --draw_ICL -se $RANDOM | |
ppanggolin rarefaction --output stepbystep -f -p stepbystep/pangenome.h5 --depth 5 --min 1 --max 50 -ms 10 -fd -ck 30 -K 3 --soft_core 0.9 -se $RANDOM | |
ppanggolin draw -p stepbystep/pangenome.h5 --tile_plot --nocloud --soft_core 0.92 --ucurve --output stepbystep -f | |
ppanggolin rgp -p stepbystep/pangenome.h5 --persistent_penalty 2 --variable_gain 1 --min_score 3 --dup_margin 0.05 | |
ppanggolin spot -p stepbystep/pangenome.h5 --spot_graph --overlapping_match 2 --set_size 3 --exact_match_size 1 | |
ppanggolin draw -p stepbystep/pangenome.h5 --draw_spots -o stepbystep -f | |
ppanggolin module -p stepbystep/pangenome.h5 --transitive 4 --size 3 --jaccard 0.86 --dup_margin 0.05 | |
ppanggolin write -p stepbystep/pangenome.h5 --output stepbystep -f --soft_core 0.9 --dup_margin 0.06 --gexf --light_gexf --csv --Rtab --projection --stats --partitions --compress --json --regions --spots --borders --families_tsv --cpu 1 | |
ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --prot_families all --gene_families shell --regions all --fasta organisms.fasta.list | |
ppanggolin draw -p stepbystep/pangenome.h5 --draw_spots --spots all -o stepbystep -f | |
ppanggolin metrics -p stepbystep/pangenome.h5 --genome_fluidity --info_modules --no_print_info -f --log metrics.log | |
cd - | |
- name: gbff parsing and MSA computing | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin workflow --cpu 1 --anno organisms.gbff.list --output myannopang | |
ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy | |
cd - | |
- name: clusters reading from external file | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin panrgp --anno organisms.gbff.list --cluster clusters.tsv --output readclusterpang | |
ppanggolin annotate --anno organisms.gbff.list --output readclusters | |
ppanggolin cluster --clusters clusters.tsv -p readclusters/pangenome.h5 | |
ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f | |
cd - | |
- name: testing align command | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin align --pangenome mybasicpangenome/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_align --draw_related --getinfo | |
cd - | |
- name: testing context command | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context | |
ppanggolin context --pangenome readclusterpang/pangenome.h5 --family some_chlam_families.txt --output test_context -f | |
cd - | |
- name: testing metadata command | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s test -m metadata/metadata_genes.tsv -a genes | |
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s test -m metadata/metadata_genomes.tsv -a genomes | |
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s test -m metadata/metadata_families.tsv -a families --omit | |
ppanggolin write -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu 1 | |
cd - | |
- name: testing config file | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml | |
ppanggolin panrgp --anno organisms.gbff.list --cluster clusters.tsv -o test_config --config panrgp_default_config.yaml | |
cd - | |
- name: testing projection cmd | |
shell: bash -l {0} | |
run: | | |
cd testingDataset | |
ppanggolin projection --pangenome myannopang/pangenome.h5 -o projection_from_gbff --organism_name trotro --annot GBFF/GCF_000026905.1_ASM2690v1_genomic.gbff.gz --spot_graph | |
ppanggolin projection --pangenome myannopang/pangenome.h5 -o projection_from_fasta --organism_name trotro --fasta FASTA/GCF_000026905.1_ASM2690v1_genomic.fna.gz --spot_graph --graph_formats graphml | |