From 3cc0410a3e3cb0248429114aed160544d284be82 Mon Sep 17 00:00:00 2001 From: Niklas Reimer Date: Tue, 10 Oct 2023 11:13:26 +0200 Subject: [PATCH 1/8] add bam-matcher submodule --- .gitmodules | 3 +++ tools/bam-matcher | 1 + 2 files changed, 4 insertions(+) create mode 160000 tools/bam-matcher diff --git a/.gitmodules b/.gitmodules index b2f6474..cecef81 100644 --- a/.gitmodules +++ b/.gitmodules @@ -28,3 +28,6 @@ [submodule "tools/fusioncatcher"] path = tools/fusioncatcher url = https://github.com/ndaniel/fusioncatcher +[submodule "tools/bam-matcher"] + path = tools/bam-matcher + url = https://github.com/buschlab/bam-matcher.git diff --git a/tools/bam-matcher b/tools/bam-matcher new file mode 160000 index 0000000..cbfd256 --- /dev/null +++ b/tools/bam-matcher @@ -0,0 +1 @@ +Subproject commit cbfd2564ed0df424d61215115ec8d18ec13db317 From e6260b4b719cefb24f619a803c9d6860d4781313 Mon Sep 17 00:00:00 2001 From: Niklas Reimer Date: Wed, 18 Oct 2023 22:50:54 +0200 Subject: [PATCH 2/8] add bam-matcher to pipeline --- RScripts/Main.R | 7 +++++- RScripts/mutationAnalysis.R | 14 +++++++++-- debian/setup.sh | 2 +- make_vc.sh | 3 +++ programs.cfg.sh | 5 ++++ tools/install.sh | 46 +++++++++++++++++++++++++++++++++++++ 6 files changed, 73 insertions(+), 4 deletions(-) diff --git a/RScripts/Main.R b/RScripts/Main.R index 1ea6e09..a1c4c8d 100644 --- a/RScripts/Main.R +++ b/RScripts/Main.R @@ -154,6 +154,10 @@ if (protocol == "somatic" | protocol == "somaticGermline") { path_input, sample, "_vc.output.indel.NORMAL.SnpEff.vcf" ) + bammatcherfile <- paste0( + path_input, sample, + "_bam-matcher.txt" + ) outfile_circos_gd <- paste0( path_output, sample, "_GD_circos.pdf" @@ -411,7 +415,8 @@ if (protocol == "somatic" | protocol == "somaticGermline") { protocol = protocol, sureselect = bed_file, sureselect_type = sureselect_type, - msi_file = msi_file + msi_file = msi_file, + bammatcherfile = bammatcherfile ) } if (protocol == "panelTumor" | protocol == "tumorOnly") { diff --git a/RScripts/mutationAnalysis.R b/RScripts/mutationAnalysis.R index 785ee83..7637add 100644 --- a/RScripts/mutationAnalysis.R +++ b/RScripts/mutationAnalysis.R @@ -14,7 +14,8 @@ mutation_analysis <- function( protocol, sureselect, sureselect_type, - msi_file + msi_file, + bammatcherfile ) { #' Mutation Analysis #' @@ -35,6 +36,7 @@ mutation_analysis <- function( #' @param targets_txt string. Path of the targets.txt file provided by the Capture Kit manufracturer #' @param protocol string. Type of protocol used for analyses #' @param sureselect string. Path to capture region bed file + #' @param bammatcherfile string. Patho to the output file of bam-matcher #' #' @return returns list of #' @return ts_og dataframe. Table of mutations in tumorsuppressor and @@ -52,6 +54,7 @@ mutation_analysis <- function( #' @return som_mut_tab dataframe. Table of somatic mutations #' @return table_loh_mutations dataframe. Table of LoH mutations #' @return all_mutations dataframe. Table of all mutations + #' @return bam_matcher. Table of bam-matcher result #' #' @note The following files are produced by mutation_analysis: #' @note - "MutationTable.txt" @@ -158,6 +161,12 @@ mutation_analysis <- function( importantpws <- imp_pws(check_mat, all_mut$all_muts) } msi <- msi(msi_file) + + bam_matcher <- data.frame() + if (protocol == "somaticGermline" || protocol == "somatic") { + bam_matcher <- read.table(bammatcherfile, header = T, comment.char = "", sep = "\t") + } + return(list(mut_tab = mutation_table, ts_og = tbl$ts_og_table, go = NULL, reactome = NULL, @@ -170,5 +179,6 @@ mutation_analysis <- function( som_mut_tab = tbl$sm_table, table_loh_mutations = tbl$lm_table, all_mutations = all_mut$all_muts, - msi = msi)) + msi = msi), + bam_matcher = bam_matcher) } diff --git a/debian/setup.sh b/debian/setup.sh index 70d843d..63e3025 100755 --- a/debian/setup.sh +++ b/debian/setup.sh @@ -33,7 +33,7 @@ function install_R() echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran40/" >> /etc/apt/sources.list && \ apt-key add "/opt/MIRACUM-Pipe/debian/r_key.asc" #apt-key adv --keyserver keyserver.ubuntu.com --recv-key B8F25A8A73EACF41 - apt-get update && apt-get install -y --no-install-recommends -t buster-cran40 r-base-dev + apt-get update && apt-get install -y --no-install-recommends -t buster-cran40 r-base-dev=4.2.3-1~bustercran.0 r-base-core=4.2.3-1~bustercran.0 R CMD javareconf } diff --git a/make_vc.sh b/make_vc.sh index 826c461..180fe62 100755 --- a/make_vc.sh +++ b/make_vc.sh @@ -86,6 +86,7 @@ readonly recalbamTD=${DIR_WES}/${NameTD}_output.sort.filtered.rmdup.realigned.fi readonly snpvcf=${DIR_WES}/${NameD}.output.snp.vcf readonly indelvcf=${DIR_WES}/${NameD}.output.indel.vcf readonly MSI_OUTPUT=${DIR_WES}/${NameD}_MSI +readonly BAMMATCHER_OUTPUT=${DIR_WES}/${CFG_CASE}_${PARAM_DIR_PATIENT}_bam-matcher.txt ${BIN_MPILEUP} --adjust-MQ "${CFG_SAMTOOLS_MPILEUP_ADJUSTMQ}" --min-MQ "${CFG_SAMTOOLS_MPILEUP_MINMQ}" --min-BQ "${CFG_GENERAL_MINBASEQUAL}" --max-depth "${CFG_SAMTOOLS_MPILEUP_MAXDEPTH}" -f "${FILE_GENOME}" -l "${CFG_REFERENCE_CAPTUREREGIONS}" "${recalbamGD}" "${recalbamTD}" | ${BIN_SOMATIC} --output-snp "${snpvcf}" --output-indel "${indelvcf}" \ --min-coverage "${CFG_VARSCAN_SOMATIC_MINCOVERAGE}" --tumor-purity "${CFG_VARSCAN_SOMATIC_TUMORPURITY}" \ @@ -218,4 +219,6 @@ fi ${MSISENSOR_PRO} -d ${MICROSATELLITE_SITES} -n "${recalbamGD}" -t "${recalbamTD}" -o "${MSI_OUTPUT}" +${BAM_MATCHER} --bam1 ${recalbamGD} --bam2 ${recalbamTD} --output "${BAMMATCHER_OUTPUT}" + #eo VC \ No newline at end of file diff --git a/programs.cfg.sh b/programs.cfg.sh index 2db703b..b1ed838 100755 --- a/programs.cfg.sh +++ b/programs.cfg.sh @@ -369,6 +369,9 @@ readonly SEQUENZA_WINDOW=$(get_config_value sequenza.window "${PARAM_DIR_PATIENT readonly SEQUENZA_NON_MATCHING_NORMAL="${DIR_REF}/sequenza/$(get_config_value sequenza.nonMatchingNormal "${PARAM_DIR_PATIENT}")" readonly SEQUENZA_CHROMOSOMES=$(get_config_value sequenza.chromosomes "${PARAM_DIR_PATIENT}") +# BAM-Matcher +readonly BAM_MATCHER=python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output + # export parameters export CFG_AUTHOR export CFG_CENTER @@ -632,3 +635,5 @@ export SEQUENZA_UTILS export SEQUENZA_WINDOW export SEQUENZA_NON_MATCHING_NORMAL export SEQUENZA_CHROMOSOMES + +export BAM_MATCHER \ No newline at end of file diff --git a/tools/install.sh b/tools/install.sh index f5f9e39..3a3db55 100755 --- a/tools/install.sh +++ b/tools/install.sh @@ -205,3 +205,49 @@ cd ${DIR_SCRIPT}/msisensor-pro ############ cd /opt/MIRACUM-Pipe/databases agfusion download -g hg38 + +############### +# bam-matcher # +############### +cd ${DIR_SCRIPT}/bam-matcher +pip3 install -r requirements.txt + +cat >"${DIR_SCRIPT}"/bam-matcher/bam-matcher.conf < ${DIR_SCRIPT}/bam-matcher/tmp.vcf +mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.1511.vcf + +awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.3680.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf +mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.3680.vcf + +awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.7550.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf +mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.7550.vcf \ No newline at end of file From cd5bf694a780c0be6009df81fe4ef34212f1434f Mon Sep 17 00:00:00 2001 From: Niklas Reimer Date: Fri, 20 Oct 2023 13:55:50 +0200 Subject: [PATCH 3/8] fix numpy --- tools/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/install.sh b/tools/install.sh index 3a3db55..2699cf6 100755 --- a/tools/install.sh +++ b/tools/install.sh @@ -210,6 +210,7 @@ agfusion download -g hg38 # bam-matcher # ############### cd ${DIR_SCRIPT}/bam-matcher +pip3 install --upgrade numpy pip3 install -r requirements.txt cat >"${DIR_SCRIPT}"/bam-matcher/bam-matcher.conf < Date: Fri, 20 Oct 2023 13:56:00 +0200 Subject: [PATCH 4/8] add bam-matcher to report --- RScripts/Report_tools.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/RScripts/Report_tools.R b/RScripts/Report_tools.R index f40ff0e..26de3af 100644 --- a/RScripts/Report_tools.R +++ b/RScripts/Report_tools.R @@ -36,6 +36,22 @@ keys <- function( } else { brca_helper <- "<1%" } + + bammatcher_helper <- paste0("Tumor/Normal passen nicht zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).") + bammatcher_count = mutation_analysis_result$bam_matcher$Same + mutation_analysis_result$bam_matcher$Different + bammatcher_count_plus = bammatcher_count + max(mutation_analysis_result$bam_matcher$X1het.2sub, mutation_analysis_result$bam_matcher$X1sub.2het) + if (bammatcher_count <= 20) { + bammatcher_helper <- "Keine Bestimmung möglich." + } else if (bammatcher_count <= 100) { + if (bammatcher_count_plus >= 0.8) { + bammatcher_helper <- paste0("Tumor/Normal passen zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).") + } + } else { + if (mutation_analysis_result$bam_matcher$FracCommon >= 0.8) { + bammatcher_helper <- paste0("Tumor/Normal passen zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).") + } + } + } if (protocol == "panelTumor" | protocol == "tumorOnly") { if (mutation_analysis_result$msi < 20) { @@ -84,6 +100,7 @@ keys <- function( "Mikrosatelliten Status (Score)", "HRD-Score (HRD-LoH|TAI|LST)", "bioinformatischer Tumorzellgehalt (%)", + "bioinformatischer Check", "Ploidie", "Anzahl CN- Regionen", paste0("Anzahl seltener Keimbahnmutationen (VAF > ", vaf, "%)") @@ -119,6 +136,7 @@ keys <- function( paste(msi_helper," (", mutation_analysis_result$msi, "%)", sep = ""), cnv_analysis_results$hrd$score, cnv_analysis_results$purity$purity*100, + bammatcher_helper, cnv_analysis_results$purity$ploidy, paste0( round( @@ -146,6 +164,7 @@ keys <- function( "Mikrosatelliten Status (Score)", "HRD-Score (HRD-LoH|TAI|LST)", "bioinformatischer Tumorzellgehalt (%)", + "bioinformatischer Check", "Ploidie", "Anzahl CN- Regionen" ), Wert = c( @@ -180,6 +199,7 @@ keys <- function( paste(msi_helper," (", mutation_analysis_result$msi, "%)", sep = ""), cnv_analysis_results$hrd$score, cnv_analysis_results$purity$purity*100, + bammatcher_helper, cnv_analysis_results$purity$ploidy, paste0( round( From 1252a0608927aa49e64f7bed4786d708abb7e382 Mon Sep 17 00:00:00 2001 From: Niklas Reimer Date: Tue, 24 Oct 2023 11:14:38 +0200 Subject: [PATCH 5/8] fix bam-matcher call --- programs.cfg.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs.cfg.sh b/programs.cfg.sh index b1ed838..b1947cb 100755 --- a/programs.cfg.sh +++ b/programs.cfg.sh @@ -370,7 +370,7 @@ readonly SEQUENZA_NON_MATCHING_NORMAL="${DIR_REF}/sequenza/$(get_config_value se readonly SEQUENZA_CHROMOSOMES=$(get_config_value sequenza.chromosomes "${PARAM_DIR_PATIENT}") # BAM-Matcher -readonly BAM_MATCHER=python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output +readonly BAM_MATCHER="python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output" # export parameters export CFG_AUTHOR From decfce5689552a955913175601a3d2f31bff8e12 Mon Sep 17 00:00:00 2001 From: Niklas Reimer Date: Fri, 27 Oct 2023 00:13:03 +0200 Subject: [PATCH 6/8] disable bam-matcher cache --- programs.cfg.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs.cfg.sh b/programs.cfg.sh index b1947cb..f4eabd1 100755 --- a/programs.cfg.sh +++ b/programs.cfg.sh @@ -370,7 +370,7 @@ readonly SEQUENZA_NON_MATCHING_NORMAL="${DIR_REF}/sequenza/$(get_config_value se readonly SEQUENZA_CHROMOSOMES=$(get_config_value sequenza.chromosomes "${PARAM_DIR_PATIENT}") # BAM-Matcher -readonly BAM_MATCHER="python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output" +readonly BAM_MATCHER="python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output --do-not-cache" # export parameters export CFG_AUTHOR From 09cfd573436d91e7f1dc490a12a5d955cd8a3d71 Mon Sep 17 00:00:00 2001 From: Niklas Reimer Date: Thu, 26 Oct 2023 21:28:45 +0200 Subject: [PATCH 7/8] set absolute paths for bam-matcher --- tools/install.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/install.sh b/tools/install.sh index 2699cf6..3f40b77 100755 --- a/tools/install.sh +++ b/tools/install.sh @@ -216,18 +216,18 @@ pip3 install -r requirements.txt cat >"${DIR_SCRIPT}"/bam-matcher/bam-matcher.conf < Date: Wed, 25 Oct 2023 14:22:17 +0200 Subject: [PATCH 8/8] fix missing param vor bam-matcher --- tools/install.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/install.sh b/tools/install.sh index 3f40b77..b0693fd 100755 --- a/tools/install.sh +++ b/tools/install.sh @@ -231,8 +231,9 @@ VCF_file: /opt/MIRACUM-Pipe/tools/bam-matcher/1kg.exome.highAF.1511.vcf [VariantCallerParameters] -GATK_MEM: 4 -GATK_nt: 1 +GATK_MEM: 4 +GATK_nt: 1 +VARSCAN_MEM: 4 [GenomeReference] REFERENCE: /opt/MIRACUM-Pipe/assets/references/genome/genome.fa