Rgp clustering #567

Workflow file for this run

	name: CI

	on:
	push:
	branches:
	- '*'
	pull_request:
	branches:
	- '*'

	# A workflow run is made up of one or more jobs that can run sequentially or in parallel
	jobs:
	test:
	name: test PPanGGOLiN on ${{ matrix.os }} with python ${{ matrix.python-version }}
	# The type of runner that the job will run on
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	os: ['ubuntu-latest', 'macos-latest']
	python-version: ['3.8', '3.9', '3.10']
	steps:
	# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
	- uses: actions/checkout@v2
	# Setting up miniconda
	- uses: conda-incubator/setup-miniconda@v2
	with:
	condarc-file: .condarc.yml
	activate-environment: test
	python-version: ${{ matrix.python-version }}
	# Install the dependencies
	- name: Set up test environment
	shell: bash -l {0}
	run: \|
	conda install -y --file requirements.txt
	conda install -y pytest
	pip install .
	# Check that it is installed and displays help without error
	- name: Check that PPanGGOLiN is installed
	shell: bash -l {0}
	run: \|
	ppanggolin --version
	ppanggolin --help
	# Check that unit tests are all passing
	- name: Unit tests
	shell: bash -l {0}
	run: pytest
	# Test the complete workflow
	- name: Complete workflow
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin all --cpu 1 --fasta organisms.fasta.list --output mybasicpangenome
	ppanggolin info --pangenome mybasicpangenome/pangenome.h5 --content --parameters --status
	cd -
	# test most options calls. If there is a change in the API somewhere that was not taken into account (whether in the options for the users, or the classes for the devs), this should fail, otherwise everything is probably good.
	#--draw_hotspots option is problematic on macOS.
	- name: Step by Step workflow with most options calls
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin annotate --fasta organisms.fasta.list --output stepbystep --kingdom bacteria --contig_filter 500
	ppanggolin cluster -p stepbystep/pangenome.h5 --defrag --coverage 0.8 --identity 0.8
	ppanggolin graph -p stepbystep/pangenome.h5 -r 10
	ppanggolin partition --output stepbystep -f -p stepbystep/pangenome.h5 --cpu 1 -b 2.6 -ms 10 -fd -ck 500 -Kmm 3 12 -im 0.04 --draw_ICL -se $RANDOM
	ppanggolin rarefaction --output stepbystep -f -p stepbystep/pangenome.h5 --depth 5 --min 1 --max 50 -ms 10 -fd -ck 30 -K 3 --soft_core 0.9 -se $RANDOM
	ppanggolin draw -p stepbystep/pangenome.h5 --tile_plot --nocloud --soft_core 0.92 --ucurve --output stepbystep -f
	ppanggolin rgp -p stepbystep/pangenome.h5 --persistent_penalty 2 --variable_gain 1 --min_score 3 --dup_margin 0.05
	ppanggolin spot -p stepbystep/pangenome.h5 --spot_graph --overlapping_match 2 --set_size 3 --exact_match_size 1
	ppanggolin draw -p stepbystep/pangenome.h5 --draw_spots -o stepbystep -f
	ppanggolin module -p stepbystep/pangenome.h5 --transitive 4 --size 3 --jaccard 0.86 --dup_margin 0.05
	ppanggolin write -p stepbystep/pangenome.h5 --output stepbystep -f --soft_core 0.9 --dup_margin 0.06 --gexf --light_gexf --csv --Rtab --projection --stats --partitions --compress --json --regions --spots --borders --families_tsv --cpu 1
	ppanggolin fasta -p stepbystep/pangenome.h5 --output stepbystep -f --prot_families all --gene_families shell --regions all --fasta organisms.fasta.list
	ppanggolin draw -p stepbystep/pangenome.h5 --draw_spots -o stepbystep -f
	ppanggolin metrics -p stepbystep/pangenome.h5 --genome_fluidity --info_modules --no_print_info -f --log metrics.log
	cd -
	- name: gbff parsing and MSA computing
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin workflow --cpu 1 --anno organisms.gbff.list --output myannopang
	ppanggolin msa --pangenome myannopang/pangenome.h5 --source dna --partition core -o myannopang/ -f --use_gene_id --phylo --single_copy
	cd -
	- name: clusters reading from external file
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin panrgp --anno organisms.gbff.list --cluster clusters.tsv --output readclusterpang
	ppanggolin annotate --anno organisms.gbff.list --output readclusters
	ppanggolin cluster --cluster clusters.tsv -p readclusters/pangenome.h5
	ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f
	cd -
	- name: testing rgp_cluster command
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5
	ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5 --ignore_incomplete_rgp --grr_metric max_grr -f --graph_formats graphml gexf
	ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5 --no_identical_rgp_merging -o rgp_clustering_no_identical_rgp_merging --graph_formats graphml
	cd -
	- name: testing align command
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin align --pangenome mybasicpangenome/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_align --draw_related --getinfo
	cd -
	- name: testing context command
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin context --pangenome myannopang/pangenome.h5 --sequences some_chlam_proteins.fasta --output test_context
	ppanggolin context --pangenome readclusterpang/pangenome.h5 --family some_chlam_families.txt --output test_context -f
	cd -
	- name: testing metadata command
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s test -m metadata/metadata_genes.tsv -a genes
	ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s test -m metadata/metadata_genomes.tsv -a genomes
	ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s test -m metadata/metadata_families.tsv -a families --omit
	ppanggolin write -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu 1
	cd -
	- name: testing config file
	shell: bash -l {0}
	run: \|
	cd testingDataset
	ppanggolin utils --default_config panrgp -o panrgp_default_config.yaml
	ppanggolin panrgp --anno organisms.gbff.list --cluster clusters.tsv -o test_config --config panrgp_default_config.yaml

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Rgp clustering #567

Workflow file

Rgp clustering #567

Jobs

Run details

Workflow file for this run