Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bam-matcher #84

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@
[submodule "tools/fusioncatcher"]
path = tools/fusioncatcher
url = https://github.com/ndaniel/fusioncatcher
[submodule "tools/bam-matcher"]
path = tools/bam-matcher
url = https://github.com/buschlab/bam-matcher.git
7 changes: 6 additions & 1 deletion RScripts/Main.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ if (protocol == "somatic" | protocol == "somaticGermline") {
path_input, sample,
"_vc.output.indel.NORMAL.SnpEff.vcf"
)
bammatcherfile <- paste0(
path_input, sample,
"_bam-matcher.txt"
)
outfile_circos_gd <- paste0(
path_output, sample,
"_GD_circos.pdf"
Expand Down Expand Up @@ -411,7 +415,8 @@ if (protocol == "somatic" | protocol == "somaticGermline") {
protocol = protocol,
sureselect = bed_file,
sureselect_type = sureselect_type,
msi_file = msi_file
msi_file = msi_file,
bammatcherfile = bammatcherfile
)
}
if (protocol == "panelTumor" | protocol == "tumorOnly") {
Expand Down
20 changes: 20 additions & 0 deletions RScripts/Report_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,22 @@ keys <- function(
} else {
brca_helper <- "<1%"
}

bammatcher_helper <- paste0("Tumor/Normal passen nicht zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).")
bammatcher_count = mutation_analysis_result$bam_matcher$Same + mutation_analysis_result$bam_matcher$Different
bammatcher_count_plus = bammatcher_count + max(mutation_analysis_result$bam_matcher$X1het.2sub, mutation_analysis_result$bam_matcher$X1sub.2het)
if (bammatcher_count <= 20) {
bammatcher_helper <- "Keine Bestimmung möglich."
} else if (bammatcher_count <= 100) {
if (bammatcher_count_plus >= 0.8) {
bammatcher_helper <- paste0("Tumor/Normal passen zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).")
}
} else {
if (mutation_analysis_result$bam_matcher$FracCommon >= 0.8) {
bammatcher_helper <- paste0("Tumor/Normal passen zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).")
}
}

}
if (protocol == "panelTumor" | protocol == "tumorOnly") {
if (mutation_analysis_result$msi < 20) {
Expand Down Expand Up @@ -84,6 +100,7 @@ keys <- function(
"Mikrosatelliten Status (Score)",
"HRD-Score (HRD-LoH|TAI|LST)",
"bioinformatischer Tumorzellgehalt (%)",
"bioinformatischer Check",
"Ploidie",
"Anzahl CN- Regionen",
paste0("Anzahl seltener Keimbahnmutationen (VAF > ", vaf, "%)")
Expand Down Expand Up @@ -119,6 +136,7 @@ keys <- function(
paste(msi_helper," (", mutation_analysis_result$msi, "%)", sep = ""),
cnv_analysis_results$hrd$score,
cnv_analysis_results$purity$purity*100,
bammatcher_helper,
cnv_analysis_results$purity$ploidy,
paste0(
round(
Expand Down Expand Up @@ -146,6 +164,7 @@ keys <- function(
"Mikrosatelliten Status (Score)",
"HRD-Score (HRD-LoH|TAI|LST)",
"bioinformatischer Tumorzellgehalt (%)",
"bioinformatischer Check",
"Ploidie",
"Anzahl CN- Regionen"
), Wert = c(
Expand Down Expand Up @@ -180,6 +199,7 @@ keys <- function(
paste(msi_helper," (", mutation_analysis_result$msi, "%)", sep = ""),
cnv_analysis_results$hrd$score,
cnv_analysis_results$purity$purity*100,
bammatcher_helper,
cnv_analysis_results$purity$ploidy,
paste0(
round(
Expand Down
14 changes: 12 additions & 2 deletions RScripts/mutationAnalysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ mutation_analysis <- function(
protocol,
sureselect,
sureselect_type,
msi_file
msi_file,
bammatcherfile
) {
#' Mutation Analysis
#'
Expand All @@ -35,6 +36,7 @@ mutation_analysis <- function(
#' @param targets_txt string. Path of the targets.txt file provided by the Capture Kit manufracturer
#' @param protocol string. Type of protocol used for analyses
#' @param sureselect string. Path to capture region bed file
#' @param bammatcherfile string. Patho to the output file of bam-matcher
#'
#' @return returns list of
#' @return ts_og dataframe. Table of mutations in tumorsuppressor and
Expand All @@ -52,6 +54,7 @@ mutation_analysis <- function(
#' @return som_mut_tab dataframe. Table of somatic mutations
#' @return table_loh_mutations dataframe. Table of LoH mutations
#' @return all_mutations dataframe. Table of all mutations
#' @return bam_matcher. Table of bam-matcher result
#'
#' @note The following files are produced by mutation_analysis:
#' @note - "MutationTable.txt"
Expand Down Expand Up @@ -158,6 +161,12 @@ mutation_analysis <- function(
importantpws <- imp_pws(check_mat, all_mut$all_muts)
}
msi <- msi(msi_file)

bam_matcher <- data.frame()
if (protocol == "somaticGermline" || protocol == "somatic") {
bam_matcher <- read.table(bammatcherfile, header = T, comment.char = "", sep = "\t")
}

return(list(mut_tab = mutation_table,
ts_og = tbl$ts_og_table,
go = NULL, reactome = NULL,
Expand All @@ -170,5 +179,6 @@ mutation_analysis <- function(
som_mut_tab = tbl$sm_table,
table_loh_mutations = tbl$lm_table,
all_mutations = all_mut$all_muts,
msi = msi))
msi = msi),
bam_matcher = bam_matcher)
}
2 changes: 1 addition & 1 deletion debian/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ function install_R()
echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran40/" >> /etc/apt/sources.list && \
apt-key add "/opt/MIRACUM-Pipe/debian/r_key.asc"
#apt-key adv --keyserver keyserver.ubuntu.com --recv-key B8F25A8A73EACF41
apt-get update && apt-get install -y --no-install-recommends -t buster-cran40 r-base-dev
apt-get update && apt-get install -y --no-install-recommends -t buster-cran40 r-base-dev=4.2.3-1~bustercran.0 r-base-core=4.2.3-1~bustercran.0
R CMD javareconf
}

Expand Down
3 changes: 3 additions & 0 deletions make_vc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ readonly recalbamTD=${DIR_WES}/${NameTD}_output.sort.filtered.rmdup.realigned.fi
readonly snpvcf=${DIR_WES}/${NameD}.output.snp.vcf
readonly indelvcf=${DIR_WES}/${NameD}.output.indel.vcf
readonly MSI_OUTPUT=${DIR_WES}/${NameD}_MSI
readonly BAMMATCHER_OUTPUT=${DIR_WES}/${CFG_CASE}_${PARAM_DIR_PATIENT}_bam-matcher.txt

${BIN_MPILEUP} --adjust-MQ "${CFG_SAMTOOLS_MPILEUP_ADJUSTMQ}" --min-MQ "${CFG_SAMTOOLS_MPILEUP_MINMQ}" --min-BQ "${CFG_GENERAL_MINBASEQUAL}" --max-depth "${CFG_SAMTOOLS_MPILEUP_MAXDEPTH}" -f "${FILE_GENOME}" -l "${CFG_REFERENCE_CAPTUREREGIONS}" "${recalbamGD}" "${recalbamTD}" | ${BIN_SOMATIC} --output-snp "${snpvcf}" --output-indel "${indelvcf}" \
--min-coverage "${CFG_VARSCAN_SOMATIC_MINCOVERAGE}" --tumor-purity "${CFG_VARSCAN_SOMATIC_TUMORPURITY}" \
Expand Down Expand Up @@ -218,4 +219,6 @@ fi

${MSISENSOR_PRO} -d ${MICROSATELLITE_SITES} -n "${recalbamGD}" -t "${recalbamTD}" -o "${MSI_OUTPUT}"

${BAM_MATCHER} --bam1 ${recalbamGD} --bam2 ${recalbamTD} --output "${BAMMATCHER_OUTPUT}"

#eo VC
5 changes: 5 additions & 0 deletions programs.cfg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,9 @@ readonly SEQUENZA_WINDOW=$(get_config_value sequenza.window "${PARAM_DIR_PATIENT
readonly SEQUENZA_NON_MATCHING_NORMAL="${DIR_REF}/sequenza/$(get_config_value sequenza.nonMatchingNormal "${PARAM_DIR_PATIENT}")"
readonly SEQUENZA_CHROMOSOMES=$(get_config_value sequenza.chromosomes "${PARAM_DIR_PATIENT}")

# BAM-Matcher
readonly BAM_MATCHER="python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output --do-not-cache"

# export parameters
export CFG_AUTHOR
export CFG_CENTER
Expand Down Expand Up @@ -632,3 +635,5 @@ export SEQUENZA_UTILS
export SEQUENZA_WINDOW
export SEQUENZA_NON_MATCHING_NORMAL
export SEQUENZA_CHROMOSOMES

export BAM_MATCHER
1 change: 1 addition & 0 deletions tools/bam-matcher
Submodule bam-matcher added at cbfd25
48 changes: 48 additions & 0 deletions tools/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,51 @@ cd ${DIR_SCRIPT}/msisensor-pro
############
cd /opt/MIRACUM-Pipe/databases
agfusion download -g hg38

###############
# bam-matcher #
###############
cd ${DIR_SCRIPT}/bam-matcher
pip3 install --upgrade numpy
pip3 install -r requirements.txt

cat >"${DIR_SCRIPT}"/bam-matcher/bam-matcher.conf <<EOI
[VariantCallers]
caller: gatk4
gatk3: /opt/MIRACUM-Pipe/tools/gatk/GenomeAnalysisTK.jar
gatk4: /opt/MIRACUM-Pipe/tools/gatk4/gatk
freebayes: freebayes
samtools: samtools
varscan: /opt/MIRACUM-Pipe/tools/bam-matcher/VarScan.jar
java: java

[ScriptOptions]
DP_threshold: 15
number_of_SNPs:
fast_freebayes: True
VCF_file: /opt/MIRACUM-Pipe/tools/bam-matcher/1kg.exome.highAF.1511.vcf


[VariantCallerParameters]
GATK_MEM: 4
GATK_nt: 1
VARSCAN_MEM: 4

[GenomeReference]
REFERENCE: /opt/MIRACUM-Pipe/assets/references/genome/genome.fa
REF_ALTERNATE:
CHROM_MAP:

[BatchOperations]
CACHE_DIR: ${DIR_TMP}
[Miscellaneous]
EOI

awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.1511.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf
mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.1511.vcf

awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.3680.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf
mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.3680.vcf

awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.7550.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf
mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.7550.vcf