diff --git a/.gitmodules b/.gitmodules index b2f6474..cecef81 100644 --- a/.gitmodules +++ b/.gitmodules @@ -28,3 +28,6 @@ [submodule "tools/fusioncatcher"] path = tools/fusioncatcher url = https://github.com/ndaniel/fusioncatcher +[submodule "tools/bam-matcher"] + path = tools/bam-matcher + url = https://github.com/buschlab/bam-matcher.git diff --git a/RScripts/Main.R b/RScripts/Main.R index 1ea6e09..a1c4c8d 100644 --- a/RScripts/Main.R +++ b/RScripts/Main.R @@ -154,6 +154,10 @@ if (protocol == "somatic" | protocol == "somaticGermline") { path_input, sample, "_vc.output.indel.NORMAL.SnpEff.vcf" ) + bammatcherfile <- paste0( + path_input, sample, + "_bam-matcher.txt" + ) outfile_circos_gd <- paste0( path_output, sample, "_GD_circos.pdf" @@ -411,7 +415,8 @@ if (protocol == "somatic" | protocol == "somaticGermline") { protocol = protocol, sureselect = bed_file, sureselect_type = sureselect_type, - msi_file = msi_file + msi_file = msi_file, + bammatcherfile = bammatcherfile ) } if (protocol == "panelTumor" | protocol == "tumorOnly") { diff --git a/RScripts/Report_tools.R b/RScripts/Report_tools.R index f40ff0e..26de3af 100644 --- a/RScripts/Report_tools.R +++ b/RScripts/Report_tools.R @@ -36,6 +36,22 @@ keys <- function( } else { brca_helper <- "<1%" } + + bammatcher_helper <- paste0("Tumor/Normal passen nicht zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).") + bammatcher_count = mutation_analysis_result$bam_matcher$Same + mutation_analysis_result$bam_matcher$Different + bammatcher_count_plus = bammatcher_count + max(mutation_analysis_result$bam_matcher$X1het.2sub, mutation_analysis_result$bam_matcher$X1sub.2het) + if (bammatcher_count <= 20) { + bammatcher_helper <- "Keine Bestimmung möglich." + } else if (bammatcher_count <= 100) { + if (bammatcher_count_plus >= 0.8) { + bammatcher_helper <- paste0("Tumor/Normal passen zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).") + } + } else { + if (mutation_analysis_result$bam_matcher$FracCommon >= 0.8) { + bammatcher_helper <- paste0("Tumor/Normal passen zusammen (", mutation_analysis_result$bam_matcher$FracCommon, "%).") + } + } + } if (protocol == "panelTumor" | protocol == "tumorOnly") { if (mutation_analysis_result$msi < 20) { @@ -84,6 +100,7 @@ keys <- function( "Mikrosatelliten Status (Score)", "HRD-Score (HRD-LoH|TAI|LST)", "bioinformatischer Tumorzellgehalt (%)", + "bioinformatischer Check", "Ploidie", "Anzahl CN- Regionen", paste0("Anzahl seltener Keimbahnmutationen (VAF > ", vaf, "%)") @@ -119,6 +136,7 @@ keys <- function( paste(msi_helper," (", mutation_analysis_result$msi, "%)", sep = ""), cnv_analysis_results$hrd$score, cnv_analysis_results$purity$purity*100, + bammatcher_helper, cnv_analysis_results$purity$ploidy, paste0( round( @@ -146,6 +164,7 @@ keys <- function( "Mikrosatelliten Status (Score)", "HRD-Score (HRD-LoH|TAI|LST)", "bioinformatischer Tumorzellgehalt (%)", + "bioinformatischer Check", "Ploidie", "Anzahl CN- Regionen" ), Wert = c( @@ -180,6 +199,7 @@ keys <- function( paste(msi_helper," (", mutation_analysis_result$msi, "%)", sep = ""), cnv_analysis_results$hrd$score, cnv_analysis_results$purity$purity*100, + bammatcher_helper, cnv_analysis_results$purity$ploidy, paste0( round( diff --git a/RScripts/mutationAnalysis.R b/RScripts/mutationAnalysis.R index 785ee83..7637add 100644 --- a/RScripts/mutationAnalysis.R +++ b/RScripts/mutationAnalysis.R @@ -14,7 +14,8 @@ mutation_analysis <- function( protocol, sureselect, sureselect_type, - msi_file + msi_file, + bammatcherfile ) { #' Mutation Analysis #' @@ -35,6 +36,7 @@ mutation_analysis <- function( #' @param targets_txt string. Path of the targets.txt file provided by the Capture Kit manufracturer #' @param protocol string. Type of protocol used for analyses #' @param sureselect string. Path to capture region bed file + #' @param bammatcherfile string. Patho to the output file of bam-matcher #' #' @return returns list of #' @return ts_og dataframe. Table of mutations in tumorsuppressor and @@ -52,6 +54,7 @@ mutation_analysis <- function( #' @return som_mut_tab dataframe. Table of somatic mutations #' @return table_loh_mutations dataframe. Table of LoH mutations #' @return all_mutations dataframe. Table of all mutations + #' @return bam_matcher. Table of bam-matcher result #' #' @note The following files are produced by mutation_analysis: #' @note - "MutationTable.txt" @@ -158,6 +161,12 @@ mutation_analysis <- function( importantpws <- imp_pws(check_mat, all_mut$all_muts) } msi <- msi(msi_file) + + bam_matcher <- data.frame() + if (protocol == "somaticGermline" || protocol == "somatic") { + bam_matcher <- read.table(bammatcherfile, header = T, comment.char = "", sep = "\t") + } + return(list(mut_tab = mutation_table, ts_og = tbl$ts_og_table, go = NULL, reactome = NULL, @@ -170,5 +179,6 @@ mutation_analysis <- function( som_mut_tab = tbl$sm_table, table_loh_mutations = tbl$lm_table, all_mutations = all_mut$all_muts, - msi = msi)) + msi = msi), + bam_matcher = bam_matcher) } diff --git a/debian/setup.sh b/debian/setup.sh index 70d843d..63e3025 100755 --- a/debian/setup.sh +++ b/debian/setup.sh @@ -33,7 +33,7 @@ function install_R() echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran40/" >> /etc/apt/sources.list && \ apt-key add "/opt/MIRACUM-Pipe/debian/r_key.asc" #apt-key adv --keyserver keyserver.ubuntu.com --recv-key B8F25A8A73EACF41 - apt-get update && apt-get install -y --no-install-recommends -t buster-cran40 r-base-dev + apt-get update && apt-get install -y --no-install-recommends -t buster-cran40 r-base-dev=4.2.3-1~bustercran.0 r-base-core=4.2.3-1~bustercran.0 R CMD javareconf } diff --git a/make_vc.sh b/make_vc.sh index 826c461..180fe62 100755 --- a/make_vc.sh +++ b/make_vc.sh @@ -86,6 +86,7 @@ readonly recalbamTD=${DIR_WES}/${NameTD}_output.sort.filtered.rmdup.realigned.fi readonly snpvcf=${DIR_WES}/${NameD}.output.snp.vcf readonly indelvcf=${DIR_WES}/${NameD}.output.indel.vcf readonly MSI_OUTPUT=${DIR_WES}/${NameD}_MSI +readonly BAMMATCHER_OUTPUT=${DIR_WES}/${CFG_CASE}_${PARAM_DIR_PATIENT}_bam-matcher.txt ${BIN_MPILEUP} --adjust-MQ "${CFG_SAMTOOLS_MPILEUP_ADJUSTMQ}" --min-MQ "${CFG_SAMTOOLS_MPILEUP_MINMQ}" --min-BQ "${CFG_GENERAL_MINBASEQUAL}" --max-depth "${CFG_SAMTOOLS_MPILEUP_MAXDEPTH}" -f "${FILE_GENOME}" -l "${CFG_REFERENCE_CAPTUREREGIONS}" "${recalbamGD}" "${recalbamTD}" | ${BIN_SOMATIC} --output-snp "${snpvcf}" --output-indel "${indelvcf}" \ --min-coverage "${CFG_VARSCAN_SOMATIC_MINCOVERAGE}" --tumor-purity "${CFG_VARSCAN_SOMATIC_TUMORPURITY}" \ @@ -218,4 +219,6 @@ fi ${MSISENSOR_PRO} -d ${MICROSATELLITE_SITES} -n "${recalbamGD}" -t "${recalbamTD}" -o "${MSI_OUTPUT}" +${BAM_MATCHER} --bam1 ${recalbamGD} --bam2 ${recalbamTD} --output "${BAMMATCHER_OUTPUT}" + #eo VC \ No newline at end of file diff --git a/programs.cfg.sh b/programs.cfg.sh index 2db703b..f4eabd1 100755 --- a/programs.cfg.sh +++ b/programs.cfg.sh @@ -369,6 +369,9 @@ readonly SEQUENZA_WINDOW=$(get_config_value sequenza.window "${PARAM_DIR_PATIENT readonly SEQUENZA_NON_MATCHING_NORMAL="${DIR_REF}/sequenza/$(get_config_value sequenza.nonMatchingNormal "${PARAM_DIR_PATIENT}")" readonly SEQUENZA_CHROMOSOMES=$(get_config_value sequenza.chromosomes "${PARAM_DIR_PATIENT}") +# BAM-Matcher +readonly BAM_MATCHER="python3 /opt/MIRACUM-Pipe/tools/bam-matcher/bam-matcher.py --reference ${FILE_GENOME} --short-output --do-not-cache" + # export parameters export CFG_AUTHOR export CFG_CENTER @@ -632,3 +635,5 @@ export SEQUENZA_UTILS export SEQUENZA_WINDOW export SEQUENZA_NON_MATCHING_NORMAL export SEQUENZA_CHROMOSOMES + +export BAM_MATCHER \ No newline at end of file diff --git a/tools/bam-matcher b/tools/bam-matcher new file mode 160000 index 0000000..cbfd256 --- /dev/null +++ b/tools/bam-matcher @@ -0,0 +1 @@ +Subproject commit cbfd2564ed0df424d61215115ec8d18ec13db317 diff --git a/tools/install.sh b/tools/install.sh index f5f9e39..b0693fd 100755 --- a/tools/install.sh +++ b/tools/install.sh @@ -205,3 +205,51 @@ cd ${DIR_SCRIPT}/msisensor-pro ############ cd /opt/MIRACUM-Pipe/databases agfusion download -g hg38 + +############### +# bam-matcher # +############### +cd ${DIR_SCRIPT}/bam-matcher +pip3 install --upgrade numpy +pip3 install -r requirements.txt + +cat >"${DIR_SCRIPT}"/bam-matcher/bam-matcher.conf < ${DIR_SCRIPT}/bam-matcher/tmp.vcf +mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.1511.vcf + +awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.3680.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf +mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.3680.vcf + +awk '{if($0 !~ /^#/) print "chr"$0; else print $0}' ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.7550.vcf > ${DIR_SCRIPT}/bam-matcher/tmp.vcf +mv ${DIR_SCRIPT}/bam-matcher/tmp.vcf ${DIR_SCRIPT}/bam-matcher/1kg.exome.highAF.7550.vcf \ No newline at end of file