Skip to content

Commit

Permalink
Add test to run part of pipeline without sex chromosomes
Browse files Browse the repository at this point in the history
  • Loading branch information
verku committed Oct 6, 2023
1 parent 06edaa4 commit 899949e
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 117 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ jobs:
run: |
snakemake --configfile .test/config/config_pca_roh.yaml -j 4 --cores 1 --use-singularity
- name: snpeff_gerp_dry
- name: snpeff_gerp_autos_dry
shell: bash -l {0}
run: |
snakemake -npr --configfile .test/config/config_snpeff_gerp.yaml -j 4 --cores 1 --use-singularity
snakemake -npr --configfile .test/config/config_snpeff_gerp_autos.yaml -j 4 --cores 1 --use-singularity
- name: snpeff_gerp
- name: snpeff_gerp_autos
shell: bash -l {0}
run: |
snakemake --configfile .test/config/config_snpeff_gerp.yaml -j 4 --cores 1 --use-singularity
snakemake --configfile .test/config/config_snpeff_gerp_autos.yaml -j 4 --cores 1 --use-singularity
59 changes: 31 additions & 28 deletions .test/config/config_mitogenomes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
# The file name will be reused by the pipeline and can have the file
# name extensions *.fasta, *.fa or *.fna.
ref_path: ".test/data/references/sumatran_rhino.fasta"

# OPTIONAL:
# Relative path (from the main pipeline directory) to file listing
# scaffolds/contigs linked to sex chromosomes (one scaffold/contig
# name per line).
# Is used to create BED files to run mlRho separately for autosomes
# and sex chromosomes or exclusively for autosomes, and/or to create
# autosome-only BCF files for PCA, ROH, snpEff and GERP analyses.
# Can also be used to specify any other contigs/scaffolds, e.g.
# unplaced or short scaffolds, for removal from mlRho analysis
# and BCF files.
# Leave empty ("") if identity of sex chromosomes is unknown and/or
# if the pipeline should be run on all scaffolds/contigs of the genome.
sexchromosomes: "" # for example, "config/chrX_candidate_scaffolds.txt"
#################################################################
#################################################################


Expand Down Expand Up @@ -278,26 +293,6 @@ CpG_samplenames: []
# Rules for BAM file processing for mlRho, and mlRho #
#################################################################

#####
# OPTIONAL:
# Generate BED files of autosomes and sex chromosomes for mlRho
# analyses, in case these should be analyzed separately from each
# other (see below for further options).
# Includes intersecting of the new chromosome-specific BED files
# with CpG- and repeat-masking BED files for downstream filtering.
autosome_sexchromosome_bed_files: False

# Relative path (from the main pipeline directory) to file listing
# scaffolds/contigs linked to sex chromosomes (one scaffold/contig
# name per line).
# Leave empty ("") if identity of sex chromosomes is unknown and/or
# if mlRho should be run on all scaffolds/contigs of the genome.
# Keep the path to the file when running the next step (mlRho)
# separately for autosomes and sex chromosomes or only for autosomes.
sexchromosomes: "" # for example, "config/chrX_candidate_scaffolds.txt"
#####


#####
# Run mlRho 2.9 on filtered BAM files.
# Automatically generates a PDF file with a plot of genome-wide
Expand All @@ -315,21 +310,22 @@ mlRho: False
# and/or mlRho should be run on all contigs/scaffolds,
# set mlRho_autosomes_sexchromosomes to False and do not provide
# a path to a text file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
#
# 2) If the identity of sex-chromosomal contigs/scaffolds is known,
# mlRho analyses can be run for autosomes and sex chromosomes
# separately from each other.
# In that case, set mlRho_autosomes_sexchromosomes to True and
# provide the path to the file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
#
# 3) If the identity of sex-chromosomal contigs/scaffolds is known,
# sex-chromosomal contigs/scaffolds can be entirely excluded from
# sex-chromosomal contigs/scaffolds (or other contigs/scaffolds such
# as unplaced or short scaffolds) can be entirely excluded from
# the analysis.
# In that case, set mlRho_autosomes_sexchromosomes to False and
# provide the path to the file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
mlRho_autosomes_sexchromosomes: False
#####
#################################################################
Expand Down Expand Up @@ -362,16 +358,23 @@ vcf_qual_repeat_filtering: False
#####
# Merge BCF files into a BCF file containing all samples and remove all
# sites that are not biallelic and with missing data across all samples
# up to a certain threshold as defined below.
# up to a certain threshold as defined below.
# If the path to a file with sex-chromosomal contigs/scaffolds is provided
# with the reference genome ("sexchromosomes"), these scaffolds/contigs are
# removed from the merged and filtered BCF file and all downstream analyses
# (optional).
# Extract 1) all historical and 2) all modern samples from the merged and
# filtered BCF file.
# Create a BED file of sites that remain after filtering across all samples
# to be used for downstream filtering of individual BCF files.
# Create a BED file of sites that remain after filtering and contig/scaffold
# removal across all samples to be used for downstream filtering of individual
# BCF files.
merge_vcfs_per_dataset: False

# Maximum allowed fraction of missing genotypes across all samples for a
# site to be kept in the BCF and BED file, to ensure that the same sites
# are compared between historical and modern samples.
# are compared between historical and modern samples. Has to be a floating
# point number between 0.0 (no missing data allowed) and 1.0 (sites are
# allowed that are completely missing).
f_missing: 0.1 # default: 0.1 (i.e. maximum 10% missing genotypes per site)
#####

Expand Down
59 changes: 31 additions & 28 deletions .test/config/config_mlRho_options.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
# The file name will be reused by the pipeline and can have the file
# name extensions *.fasta, *.fa or *.fna.
ref_path: ".test/data/references/sumatran_rhino.fasta"

# OPTIONAL:
# Relative path (from the main pipeline directory) to file listing
# scaffolds/contigs linked to sex chromosomes (one scaffold/contig
# name per line).
# Is used to create BED files to run mlRho separately for autosomes
# and sex chromosomes or exclusively for autosomes, and/or to create
# autosome-only BCF files for PCA, ROH, snpEff and GERP analyses.
# Can also be used to specify any other contigs/scaffolds, e.g.
# unplaced or short scaffolds, for removal from mlRho analysis
# and BCF files.
# Leave empty ("") if identity of sex chromosomes is unknown and/or
# if the pipeline should be run on all scaffolds/contigs of the genome.
sexchromosomes: "" # for example, "config/chrX_candidate_scaffolds.txt"
#################################################################
#################################################################


Expand Down Expand Up @@ -278,26 +293,6 @@ CpG_samplenames: ["S03", "S08"]
# Rules for BAM file processing for mlRho, and mlRho #
#################################################################

#####
# OPTIONAL:
# Generate BED files of autosomes and sex chromosomes for mlRho
# analyses, in case these should be analyzed separately from each
# other (see below for further options).
# Includes intersecting of the new chromosome-specific BED files
# with CpG- and repeat-masking BED files for downstream filtering.
autosome_sexchromosome_bed_files: False

# Relative path (from the main pipeline directory) to file listing
# scaffolds/contigs linked to sex chromosomes (one scaffold/contig
# name per line).
# Leave empty ("") if identity of sex chromosomes is unknown and/or
# if mlRho should be run on all scaffolds/contigs of the genome.
# Keep the path to the file when running the next step (mlRho)
# separately for autosomes and sex chromosomes or only for autosomes.
sexchromosomes: "" # for example, "config/chrX_candidate_scaffolds.txt"
#####


#####
# Run mlRho 2.9 on filtered BAM files.
# Automatically generates a PDF file with a plot of genome-wide
Expand All @@ -315,21 +310,22 @@ mlRho: True
# and/or mlRho should be run on all contigs/scaffolds,
# set mlRho_autosomes_sexchromosomes to False and do not provide
# a path to a text file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
#
# 2) If the identity of sex-chromosomal contigs/scaffolds is known,
# mlRho analyses can be run for autosomes and sex chromosomes
# separately from each other.
# In that case, set mlRho_autosomes_sexchromosomes to True and
# provide the path to the file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
#
# 3) If the identity of sex-chromosomal contigs/scaffolds is known,
# sex-chromosomal contigs/scaffolds can be entirely excluded from
# sex-chromosomal contigs/scaffolds (or other contigs/scaffolds such
# as unplaced or short scaffolds) can be entirely excluded from
# the analysis.
# In that case, set mlRho_autosomes_sexchromosomes to False and
# provide the path to the file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
mlRho_autosomes_sexchromosomes: False
#####
#################################################################
Expand Down Expand Up @@ -362,16 +358,23 @@ vcf_qual_repeat_filtering: False
#####
# Merge BCF files into a BCF file containing all samples and remove all
# sites that are not biallelic and with missing data across all samples
# up to a certain threshold as defined below.
# up to a certain threshold as defined below.
# If the path to a file with sex-chromosomal contigs/scaffolds is provided
# with the reference genome ("sexchromosomes"), these scaffolds/contigs are
# removed from the merged and filtered BCF file and all downstream analyses
# (optional).
# Extract 1) all historical and 2) all modern samples from the merged and
# filtered BCF file.
# Create a BED file of sites that remain after filtering across all samples
# to be used for downstream filtering of individual BCF files.
# Create a BED file of sites that remain after filtering and contig/scaffold
# removal across all samples to be used for downstream filtering of individual
# BCF files.
merge_vcfs_per_dataset: False

# Maximum allowed fraction of missing genotypes across all samples for a
# site to be kept in the BCF and BED file, to ensure that the same sites
# are compared between historical and modern samples.
# are compared between historical and modern samples. Has to be a floating
# point number between 0.0 (no missing data allowed) and 1.0 (sites are
# allowed that are completely missing).
f_missing: 0.1 # default: 0.1 (i.e. maximum 10% missing genotypes per site)
#####

Expand Down
61 changes: 32 additions & 29 deletions .test/config/config_pca_roh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
# The file name will be reused by the pipeline and can have the file
# name extensions *.fasta, *.fa or *.fna.
ref_path: ".test/data/references/sumatran_rhino.fasta"

# OPTIONAL:
# Relative path (from the main pipeline directory) to file listing
# scaffolds/contigs linked to sex chromosomes (one scaffold/contig
# name per line).
# Is used to create BED files to run mlRho separately for autosomes
# and sex chromosomes or exclusively for autosomes, and/or to create
# autosome-only BCF files for PCA, ROH, snpEff and GERP analyses.
# Can also be used to specify any other contigs/scaffolds, e.g.
# unplaced or short scaffolds, for removal from mlRho analysis
# and BCF files.
# Leave empty ("") if identity of sex chromosomes is unknown and/or
# if the pipeline should be run on all scaffolds/contigs of the genome.
sexchromosomes: "" # for example, "config/chrX_candidate_scaffolds.txt"
#################################################################
#################################################################


Expand Down Expand Up @@ -278,26 +293,6 @@ CpG_samplenames: []
# Rules for BAM file processing for mlRho, and mlRho #
#################################################################

#####
# OPTIONAL:
# Generate BED files of autosomes and sex chromosomes for mlRho
# analyses, in case these should be analyzed separately from each
# other (see below for further options).
# Includes intersecting of the new chromosome-specific BED files
# with CpG- and repeat-masking BED files for downstream filtering.
autosome_sexchromosome_bed_files: False

# Relative path (from the main pipeline directory) to file listing
# scaffolds/contigs linked to sex chromosomes (one scaffold/contig
# name per line).
# Leave empty ("") if identity of sex chromosomes is unknown and/or
# if mlRho should be run on all scaffolds/contigs of the genome.
# Keep the path to the file when running the next step (mlRho)
# separately for autosomes and sex chromosomes or only for autosomes.
sexchromosomes: "" # for example, "config/chrX_candidate_scaffolds.txt"
#####


#####
# Run mlRho 2.9 on filtered BAM files.
# Automatically generates a PDF file with a plot of genome-wide
Expand All @@ -315,21 +310,22 @@ mlRho: False
# and/or mlRho should be run on all contigs/scaffolds,
# set mlRho_autosomes_sexchromosomes to False and do not provide
# a path to a text file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
#
# 2) If the identity of sex-chromosomal contigs/scaffolds is known,
# mlRho analyses can be run for autosomes and sex chromosomes
# separately from each other.
# In that case, set mlRho_autosomes_sexchromosomes to True and
# provide the path to the file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
#
# 3) If the identity of sex-chromosomal contigs/scaffolds is known,
# sex-chromosomal contigs/scaffolds can be entirely excluded from
# sex-chromosomal contigs/scaffolds (or other contigs/scaffolds such
# as unplaced or short scaffolds) can be entirely excluded from
# the analysis.
# In that case, set mlRho_autosomes_sexchromosomes to False and
# provide the path to the file with sex-chromosomal contigs/scaffolds
# above when running mlRho.
# with the reference genome ("sexchromosomes") when running mlRho.
mlRho_autosomes_sexchromosomes: False
#####
#################################################################
Expand Down Expand Up @@ -362,17 +358,24 @@ vcf_qual_repeat_filtering: False
#####
# Merge BCF files into a BCF file containing all samples and remove all
# sites that are not biallelic and with missing data across all samples
# up to a certain threshold as defined below.
# up to a certain threshold as defined below.
# If the path to a file with sex-chromosomal contigs/scaffolds is provided
# with the reference genome ("sexchromosomes"), these scaffolds/contigs are
# removed from the merged and filtered BCF file and all downstream analyses
# (optional).
# Extract 1) all historical and 2) all modern samples from the merged and
# filtered BCF file.
# Create a BED file of sites that remain after filtering across all samples
# to be used for downstream filtering of individual BCF files.
# Create a BED file of sites that remain after filtering and contig/scaffold
# removal across all samples to be used for downstream filtering of individual
# BCF files.
merge_vcfs_per_dataset: False

# Maximum allowed fraction of missing genotypes across all samples for a
# site to be kept in the BCF and BED file, to ensure that the same sites
# are compared between historical and modern samples.
f_missing: 0.5 # default: 0.1 (i.e. maximum 10% missing genotypes per site)
# are compared between historical and modern samples. Has to be a floating
# point number between 0.0 (no missing data allowed) and 1.0 (sites are
# allowed that are completely missing).
f_missing: 0.1 # default: 0.1 (i.e. maximum 10% missing genotypes per site)
#####

#################################################################
Expand Down
Loading

0 comments on commit 899949e

Please sign in to comment.