Skip to content

Commit

Permalink
Added more patient.yaml examples
Browse files Browse the repository at this point in the history
  • Loading branch information
metzgerpatrick committed Jun 17, 2022
1 parent 40a742c commit 574c93c
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ databases/*

# assets/input
assets/input/*
!assets/input/example_patient.yaml
!assets/input/example_WES_patient.yaml
!assets/input/example_TSO500_patient.yaml
!assets/input/.gitkeep

# assets/references
Expand Down
212 changes: 212 additions & 0 deletions assets/input/example_TSO500_patient.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
common:
author: MIRACUM-Pipe
center: Freiburg
memory: 150g
cpucores: 12
files:
# filenames of the input files as fastq.gz
panel:
tumor: MT21-28037-DNA_S7_L00 #MT21_19902-RNA_S12_L001_R1_001.fastq.gz
numberOfFiles: 4
entity: ESCA
RNA:
# folder contatining RNA fastq files for RNA fusions
folder: RNA
protocol: panel
# process and annotate germline file, allowed values: yes, no
# set to no if the germline sample should be used only to identify somatic variants
germline: yes
ucscServer: genome-euro-mysql.soe.ucsc.edu

sex: XY

general:
# minimum base quality to call a variant
minBaseQual: 28
# minimum
maf_cutoff: 0.01
# minimum variant allele / tumor frequency to call a variant
minVAF: 0.05

## Panel Parameters
panel:
samtools:
mpileup2snp:
#--min-coverage Minimum read depth at a position to make a call [8]
minCoverage: 20
#--min-reads2 Minimum supporting reads at a position to call variants [2]
minReads2: 2
#--min-freq-for-hom Minimum frequency to call homozygote [0.75]
minFreqForHom: 0.75
#--p-value Default p-value threshold for calling variants [99e-02]
pValue: 0.99
#--strand-filter Ignore variants with >90% support on one strand [1]
strandFilter: 1
#--min-var-freq Minimum variant allele frequency threshold [0.01] -> minVAF
#--min-avg-qual Minimum base quality at a position to count a read [15] -> minBaseQual

mpileup2indel:
#--min-coverage Minimum read depth at a position to make a call [8]
minCoverage: 20
#--min-reads2 Minimum supporting reads at a position to call variants [2]
minReads2: 2
#--min-freq-for-hom Minimum frequency to call homozygote [0.75]
minFreqForHom: 0.75
#--p-value Default p-value threshold for calling variants [99e-02]
pValue: 0.99
#--strand-filter Ignore variants with >90% support on one strand [1]
strandFilter: 1
#--min-var-freq Minimum variant allele frequency threshold [0.01] -> minVAF
#--min-avg-qual Minimum base quality at a position to count a read [15] -> minBaseQual

varscan:
fpfilter:
#--min-var-count Minimum number of variant-supporting reads [4]
minVarCount: 50
#--min-var-count-lc Minimum number of variant-supporting reads when depth below somaticPdepth [2]
minVarCountLC: 2
#--max-somatic-p Maximum somatic p-value [0.05]
maxSomaticP: 0.05
#--max-somatic-p-depth Depth required to test max somatic p-value [10]
maxSomaticPDepth: 10
#--min-ref-readpos Minimum average read position of ref-supporting reads [0.1]
minRefReadpos: 0.1
#--min-var-readpos Minimum average read position of var-supporting reads [0.1]
minVarReadpos: 0.1
#--min-ref-dist3 Minimum average distance to effective 3' end (ref) [0.1]
minRefDist3: 0.1
#--min-var-dist3 Minimum average distance to effective 3' end (var) [0.1]
minVarDist3: 0.1
#--min-strandedness Minimum fraction of variant reads from each strand [0.01]
minStrandedness: 0.01
#--min-strand-reads Minimum allele depth required to perform the strand tests [5]
minStrandReads: 5
#--max-basequal-diff Maximum average base quality diff (ref - var) [50]
maxBasequalDiff: 50
#--min-ref-avgrl Minimum average trimmed read length for ref allele [90]
minRefAVGRL: 90
#--min-var-avgrl Minimum average trimmed read length for var allele [90]
minVarAVGRL: 90
#--max-rl-diff Maximum average relative read length difference (ref - var) [0.25]
maxRlDiff: 0.25
#--max-ref-mmqs Maximum mismatch quality sum of reference-supporting reads [100]
maxRefMMQS: 100
#--max-var-mmqs Maximum mismatch quality sum of variant-supporting reads [100]
maxVarMMQS: 100
#--min-mmqs-diff Minimum average mismatch quality sum (var - ref) [0]
minMMQSDiff: 0
#--max-mmqs-diff Maximum average mismatch quality sum (var - ref) [50]
maxMMQSDiff: 50
#--min-ref-mapqual Minimum average mapping quality for ref allele [15]
minRefMapQual: 15
#--min-var-mapqual Minimum average mapping quality for var allele [15]
minVarMapQual: 15
#--max-mapqual-diff Maximum average mapping quality (ref - var) [50]
maxMapQualDiff: 50

mutect:
#--base-quality-score-threshold 28; Base qualities below this threshold will be reduced to the minimum (6) Default value: 18
#--callable-depth 8; Minimum depth to be considered callable for Mutect stats. Does not affect genotyping. Default value: 10
callableDepth: 8
#--intervals V6UTR.bed; One or more genomic intervals over which to operate This argument may be specified 0 or more times. Default value: null
#--min-base-quality-score 28; Minimum base quality required to consider a base for calling Default value: 10

filterMutectCalls:
#--contamination-estimate 0.5?; Estimate of contamination. Default value: 0.0 (evtl. tumor purity)
#--intervals V6UTR.bed; One or more genomic intervals over which to operate This argument may be specified 0 or more times. Default value: null
#--min-allele-fraction 0.05; Minimum allele fraction required Default value: 0.0 (VAF?)
#--min-median-base-quality 28; Minimum median base quality of alt reads Default value: 20

ControlFREEC:
# desired behavior in the ambiguous regions (poly-N or low mappability regions between two different copy number values)
# 0: the "unknown" region is attached to the "known" region on the right
# 1: make a separate fragment of this unknown region and then attaches it to the left or to the right region choosing the longer one
# 2: make a separate fragment of this unknown region and then attaches it to the left or to the right region but the ploidy copy number has a priority
# 3: make a separate fragment of this unknown region and then attaches it to the left or to the right region choosing the longer one but this known region should make at least half-size of the unknown region
# 4: make a separate fragment of this unknown region and do not assign any copy number to this region at all
breakPointType: 4
# positive value of threshold for segmentation of normalized profiles
breakPointThreshold: 1.2
# set to 1 or 2 to correct the Read Count (RC) for GC-content bias and low mappability even when you have a control sample
# 0: simply model "sample RC ~ Control RC"
# 1: normalize the sample and the control RC using GC-content and then calculate the ratio "Sample RC/contol RC"
# 2: model "sample RC ~ Control RC" bias, and then normalize for GC-content
forceGCcontentNormalization: 1
# intercept of polynomial
# 1 - with GC-content,
# 0 - with a control dataset
intercept: 1
# minimal number of consecutive windows to call a CNA
# 3 for WES and 1 for WGS
minCNAlength: 3
# maxThreads: 12 -> cpucores
# set TRUE for target resequencing data (e.g., exome-seq) to avoid false positive predictions due to non-uniform capture
noisyData: TRUE
# genome ploidy; In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs
ploidy: 2
# set FALSE to avoid printing "-1" to the _ratio.txt files. Useful for exome-seq or targeted sequencing data
printNA: FALSE
# threshold on the minimal number of reads per window in the control sample
# Useful for exome-seq or targeted sequencing data
# recommended value >=50 for for exome data
readCountThreshold: 50
# step (used only when "window" is specified); do not use for exome sequencing (instead set "window=0")
step: 0
# explicit window size (higher priority than coefficientOfVariation);
# for whole genome sequencing: "window=50000"
# for whole exome sequencing: "window=0"
window: 0
# Use a mappability profile to correct read counts (in this case a mappability file must be provided with "gemMappabilityFile" )
uniqueMatch: TRUE
# set TRUE to correct for contamination by normal cells.
# If "contamination" is not provided, it will automatically evaluate the level of contamination
contaminationAdjustment: TRUE

fusions:
fusionGenes: TSO500_fusions.txt
amplification:
amplificationGenes: TSO500_amplifications.txt

annovar:
# define annotation databases, together with their type, used. The databases have to be installed with annovar
protocol: 'refGene,gnomad211_genome,avsnp150,clinvar_20210501,intervar_20180118,dbnsfp42a,cosmic_coding,cosmic_noncoding'
argop: 'g,f,f,f,f,f,f,f'

sequenza:
# define sequenza-utils parameter
# --window WINDOW; Window size used for binning the original seqz file. Default is 50.
window: 50
# if used for samples without mathcing normal as control a separate non-matching-normal file in bam format is needed;
# file should be stored in references/sequenza
nonMatchingNormal: non_matching_normal.bam
chromosomes: chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX

reference:
# define reference genome (located in references/Genome incl. indices)
genome: genome.fa
# file containing the length of the chromosomes (located in references/chromosomes)
length: hg19.len
# reference wig file for HRD calculation; if not present its generated (to be put/created in databases)
hrdRef: hg19all.wig.gz
# microsatellite sites for msisnesor-pro; if not present its generated (to be put/created in databases)
microsatelliteSites: microsatellite_sites_hg19
# database of known SNPs (located in databses/dbSNP)
dbSNP: snp150hg19.vcf.gz
# Mappability file needed for calling CNVs with ControlFREEC (located in reference/mappability)
mappability: out100m2_hg19.gem
sequencing: # all located in references/sequencing
# target region covered by the sequencer in .bed format
captureRegions: TSO500.bed
# file containing all the covered genes as HUGO Symbols
captureGenes: TSO500_Targets.txt
# target region in Mega bases covered
coveredRegion: 1.94
# target / capture region kit name
captureRegionName: TSO500
# target capture correlation factors for mutation signature analysis
captureCorFactors : targetCapture_cor_factors.rda
# covered exons by capture kit; basically intersection of exome with capture regions
# e.g. bedtools intersect -a targets.bed -b bed_exons_hg19.bed > exons_Routine.bed -u
coveredExons: exons_TSO500.bed
# positive list for genes in germline to be reported (e.g. by ACMG)
# actionableGenes: actionable_genes.txt
File renamed without changes.

0 comments on commit 574c93c

Please sign in to comment.