diff --git a/scripts/addQC2seurat.sh b/scripts/addQC2seurat.sh deleted file mode 100644 index 89c4ca6..0000000 --- a/scripts/addQC2seurat.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -## reconstruct peak-cell matrix given a union peak file, a fragment.txt file and -## a barcodes.txt file, separated by comma -## the output peak-by-cell matrix will be saved under reConstruct_Matrix/, which -## was under the same directory as barcodes.txt file - -set -e - -inputs=$1 - -inputs=(${inputs//,/ }) -seurat_file=${inputs[0]} -qc_file=${inputs[1]} - -# reading configure file -curr_dir=`dirname $0` -source ${curr_dir}/read_conf.sh -read_conf "$2" -read_conf "$3" - - -${R_PATH}/R --vanilla --args $seurat_file $qc_file < ${curr_dir}/src/addQC2seurat.R - diff --git a/scripts/downstream.sh b/scripts/downstream.sh index 98c2086..aefe83f 100755 --- a/scripts/downstream.sh +++ b/scripts/downstream.sh @@ -13,6 +13,7 @@ read_conf $3 frag_file=${OUTPUT_DIR}/summary/${OUTPUT_PREFIX}.fragments.tsv.gz + ## clustering ${curr_dir}/clustering.sh $input_mtx $2 $3 diff --git a/scripts/integrate_peak.sh b/scripts/integrate_peak.sh index 9981cc4..3b7dc39 100755 --- a/scripts/integrate_peak.sh +++ b/scripts/integrate_peak.sh @@ -50,7 +50,10 @@ do frag0_file=$(find $frag0_dir -name "*fragments*" | grep -v "\.len") mat0_dir=${mat0_dir}/${PEAK_CALLER}/${CELL_CALLER} #bc0_file=$(find ${mat0_dir} -name "*barcodes.txt") - bc0_file=${mat0_dir}/barcodes.txt + bc0_file=${mat0_dir}/barcodes_doubletsRemoved.txt + if [ ! -e "$bc0_file" ]; then + bc0_file=${mat0_dir}/barcodes.txt + fi bash ${curr_dir}/reConstMtx.sh ${feature_file},${frag0_file},${bc0_file} $2 $3 mtx_files=${mtx_files},${mat0_dir}/reConstruct_matrix/matrix.mtx done diff --git a/scripts/process.sh b/scripts/process.sh index 2336b71..39aa215 100755 --- a/scripts/process.sh +++ b/scripts/process.sh @@ -77,12 +77,19 @@ echo "call cell ..." mat_file=${OUTPUT_DIR}/raw_matrix/${PEAK_CALLER}/matrix.mtx ${curr_dir}/call_cell.sh $mat_file $2 $3 -## 8. mapping qc for cell barcodes + +## 8. remove doublets +filtered_mtx_file=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/matrix.rds +${curr_dir}/rmDoublets.sh ${filtered_mtx_file},0.03 $2 $3 + +## 9. mapping qc for cell barcodes map_dir=${OUTPUT_DIR}/mapping_result input_bam=${map_dir}/${OUTPUT_PREFIX}.positionsort.bam -input_bc=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/barcodes.txt +input_bc=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/barcodes_doubletsRemoved.txt ${curr_dir}/get_bam4Cells.sh ${input_bam},${input_bc} $2 $3 -## report preprocessing QC +## 10.report preprocessing QC echo "generating report ..." -${curr_dir}/report.sh $OUTPUT_DIR/summary $2 $3 +${curr_dir}/report.sh ${OUTPUT_DIR}/summary $2 $3 + + diff --git a/scripts/process_no_dex.sh b/scripts/process_no_dex.sh index 823e225..a7a0c0a 100755 --- a/scripts/process_no_dex.sh +++ b/scripts/process_no_dex.sh @@ -75,13 +75,17 @@ echo "call cell ..." mat_file=${OUTPUT_DIR}/raw_matrix/${PEAK_CALLER}/matrix.mtx ${curr_dir}/call_cell.sh $mat_file $2 $3 -## 8. mapping qc for cell barcodes +## 8. remove doublets +filtered_mtx_file=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/matrix.rds +${curr_dir}/rmDoublets.sh ${filtered_mtx_file},0.03 $2 $3 + +## 9. mapping qc for cell barcodes map_dir=${OUTPUT_DIR}/mapping_result input_bam=${map_dir}/${OUTPUT_PREFIX}.positionsort.bam -input_bc=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/barcodes.txt +input_bc=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/barcodes_doubletsRemoved.txt ${curr_dir}/get_bam4Cells.sh ${input_bam},${input_bc} $2 $3 -## report preprocessing QC +## 10.report preprocessing QC echo "generating report ..." -${curr_dir}/report.sh $OUTPUT_DIR/summary $2 $3 +${curr_dir}/report.sh ${OUTPUT_DIR}/summary $2 $3 diff --git a/scripts/process_with_bam.sh b/scripts/process_with_bam.sh index 1c3788c..2bbc9ab 100755 --- a/scripts/process_with_bam.sh +++ b/scripts/process_with_bam.sh @@ -58,14 +58,18 @@ echo "call cell ..." mat_file=${OUTPUT_DIR}/raw_matrix/${PEAK_CALLER}/matrix.mtx ${curr_dir}/call_cell.sh $mat_file $2 $3 -## 6. mapping qc for cell barcodes +## 6. remove doublets +filtered_mtx_file=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/matrix.rds +${curr_dir}/rmDoublets.sh ${filtered_mtx_file},0.03 $2 $3 + +## 7. mapping qc for cell barcodes map_dir=${OUTPUT_DIR}/mapping_result input_bam=${map_dir}/${OUTPUT_PREFIX}.positionsort.bam -input_bc=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/barcodes.txt +input_bc=${OUTPUT_DIR}/filtered_matrix/${PEAK_CALLER}/${CELL_CALLER}/barcodes_doubletsRemoved.txt ${curr_dir}/get_bam4Cells.sh ${input_bam},${input_bc} $2 $3 - -## report preprocessing QC +## 8.report preprocessing QC echo "generating report ..." -${curr_dir}/report.sh $OUTPUT_DIR/summary $2 $3 +${curr_dir}/report.sh ${OUTPUT_DIR}/summary $2 $3 + diff --git a/scripts/src/rmDoublets.R b/scripts/src/rmDoublets.R index f164770..818bc32 100644 --- a/scripts/src/rmDoublets.R +++ b/scripts/src/rmDoublets.R @@ -9,7 +9,7 @@ source_local('dsAnalysis_utilities.R') args = commandArgs(T) inputObj_file = args[1] ## a mtx.rds or a seurat.rds file drate = as.numeric(args[2]) -if(is.null(drate)) drate = 0.4 +if(is.null(drate)) drate = 0.03 input.obj = readRDS(inputObj_file) if(any(class(input.obj) == 'Seurat')) { seurat.obj = input.obj diff --git a/scripts/src/scATAC-pro_report.Rmd b/scripts/src/scATAC-pro_report.Rmd index 2986452..cc9398a 100644 --- a/scripts/src/scATAC-pro_report.Rmd +++ b/scripts/src/scATAC-pro_report.Rmd @@ -123,7 +123,9 @@ if(file.exists(cell_mapping_qc_file)) cell_mapping_qc = fread(cell_mapping_qc_fi bc_stat_file = paste0(params$output_dir, '/summary/', OUTPUT_PREFIX, '.', PEAK_CALLER, '.qc_per_barcode.txt') -selected_bcs = paste0(params$output_dir, '/filtered_matrix/', PEAK_CALLER, '/', CELL_CALLER, '/barcodes.txt') +selected_bcs = paste0(params$output_dir, '/filtered_matrix/', PEAK_CALLER, '/', CELL_CALLER, '/barcodes_doubletsRemoved.txt') +if(!file.exists(selected_bcs)) selected_bcs = paste0(params$output_dir, '/filtered_matrix/', PEAK_CALLER, '/', CELL_CALLER, '/barcodes.txt') + peak_file = paste0(params$output_dir, '/peaks/', PEAK_CALLER, '/', OUTPUT_PREFIX, '_features_BlacklistRemoved.bed') bc_stat = fread(bc_stat_file)