Skip to content

Commit

Permalink
add more qc for cell barcodes
Browse files Browse the repository at this point in the history
  • Loading branch information
wbaopaul committed Oct 25, 2019
1 parent 292dee6 commit 0dc7613
Show file tree
Hide file tree
Showing 11 changed files with 386 additions and 155 deletions.
2 changes: 1 addition & 1 deletion scATAC-pro
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ fi

################################ check valid STEPs #####
############################
AVAILABLE_STEP_ARRAY=("demplx_fastq" "trimming" "mapping" "after_mapping" "aggr_signal" "call_peak" "recall_peak" "get_mtx" "qc_per_barcode" "call_cell" "clustering" "motif_analysis" "call_cnv" "runDA" "runGO" "runCicero" "split_bam" "footprint" "report" "process" "process_no_dex" "integrate" "downstream" "all" "integrate_seu")
AVAILABLE_STEP_ARRAY=("demplx_fastq" "trimming" "mapping" "after_mapping" "aggr_signal" "call_peak" "recall_peak" "get_mtx" "qc_per_barcode" "call_cell" "get_bam4Cells" "clustering" "motif_analysis" "call_cnv" "runDA" "runGO" "runCicero" "split_bam" "footprint" "report" "process" "process_no_dex" "integrate" "downstream" "all" "integrate_seu")


check_s=0
Expand Down
10 changes: 10 additions & 0 deletions scripts/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ call_cell: config_check
@$(SCRIPTS)/call_cell.sh $(INPUT_FILE) $(CONFIG_FILE) $(CONFIG_SYS)
@echo

#######################################
## extract bam for cell barcodes
#######################################
get_bam4Cells: config_check
@echo "--------------------------------------------"
@date
@echo "Extracting bam for cell barcodes ..."
@$(SCRIPTS)/get_bam4Cells.sh $(INPUT_FILE) $(CONFIG_FILE) $(CONFIG_SYS)
@echo

#######################################
## process
#######################################
Expand Down
92 changes: 92 additions & 0 deletions scripts/cell_mapping_qc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash

## output mapping qc results

input_dir=$1 ## the directory where the mapping results are

curr_dir=`dirname $0`
source ${curr_dir}/read_conf.sh
read_conf "$2"
read_conf "$3"

output_dir=${OUTPUT_DIR}/summary
mkdir -p $output_dir

ncore=$(nproc --all)
ncore=$(($ncore - 1))

input_pre=${input_dir}/cell_barcodes
output_pre=${output_dir}/cell_barcodes

${SAMTOOLS_PATH}/samtools flagstat -@ $ncore ${input_pre}.bam > ${output_pre}.flagstat.txt
${SAMTOOLS_PATH}/samtools idxstats -@ $ncore ${input_pre}.bam > ${output_pre}.idxstat.txt

if [[ ! -f ${input_pre}.MAPQ${MAPQ}.bam ]];then
${SAMTOOLS_PATH}/samtools view -@ $ncore -h -b -q ${MAPQ} ${input_pre}.bam > ${input_pre}.MAPQ${MAPQ}.bam
fi

if [[ ! -f ${input_pre}.MAPQ${MAPQ}.bam.bai ]];then
${SAMTOOLS_PATH}/samtools index -@ $ncore ${input_pre}.MAPQ${MAPQ}.bam
fi

${SAMTOOLS_PATH}/samtools flagstat -@ $ncore ${input_pre}.MAPQ${MAPQ}.bam > ${output_pre}.MAPQ${MAPQ}.flagstat.txt
${SAMTOOLS_PATH}/samtools idxstats -@ $ncore ${input_pre}.MAPQ${MAPQ}.bam > ${output_pre}.MAPQ${MAPQ}.idxstat.txt


tmp_sam_file=${output_dir}/tmp.sam
${SAMTOOLS_PATH}/samtools view -@ $ncore -q 5 ${input_pre}.bam > $tmp_sam_file

if [ $MAPPING_METHOD == 'bwa' ]; then
total_uniq_mapped=$( wc -l ${tmp_sam_file} | cut -d ' ' -f1 ) ## number of unique mapped reads
elif [ $MAPPING_METHOD == 'bowtie' ]; then
total_uniq_mapped=$( grep -E "@|NM:" $tmp_sam_file | grep -v "XS:" | wc -l )
else
total_uniq_mapped=$( grep -E "@|NM:" $tmp_sam_file | grep -v "XS:" | wc -l )
fi


total_uniq_mapped=$((${total_uniq_mapped}/2))

total_pairs=$(grep 'paired in' ${output_pre}.flagstat.txt | cut -d ' ' -f1)
total_pairs=$((${total_pairs}/2))
total_pairs_mapped=$(grep 'with itself and mate mapped' ${output_pre}.flagstat.txt | cut -d ' ' -f1)
total_pairs_mapped=$((${total_pairs_mapped}/2))
total_mito_mapped=$(grep chrM ${output_pre}.idxstat.txt | cut -f3)
total_mito_unmapped=$(grep chrM ${output_pre}.idxstat.txt | cut -f4)
total_mito=$((${total_mito_mapped}/2 + ${total_mito_unmapped}/2))
total_mito_mapped=$((${total_mito_mapped}/2))
total_dups=$(grep 'duplicates' ${output_pre}.flagstat.txt | cut -d ' ' -f1)
total_dups=$((${total_dups}/2))



total_pairs_MAPQH=$(grep 'with itself and mate mapped' ${output_pre}.MAPQ${MAPQ}.flagstat.txt | cut -d ' ' -f1)
total_pairs_MAPQH=$((${total_pairs_MAPQH}/2))
total_mito_MAPQH=$(grep chrM ${output_pre}.MAPQ${MAPQ}.idxstat.txt | cut -f3)
total_mito_MAPQH=$((${total_mito_MAPQH}/2))
total_dups_MAPQH=$(grep 'duplicates' ${output_pre}.MAPQ${MAPQ}.flagstat.txt | cut -d ' ' -f1)
total_dups_MAPQH=$((${total_dups_MAPQH}/2))

rm ${output_pre}.idxstat.txt
rm ${output_pre}.flagstat.txt

rm ${output_pre}.MAPQ${MAPQ}.idxstat.txt
rm ${output_pre}.MAPQ${MAPQ}.flagstat.txt

#print to file
echo "Total_Pairs $total_pairs" > ${output_pre}.MappingStats
echo "Total_Pairs_Mapped $total_pairs_mapped" >> ${output_pre}.MappingStats
echo "Total_Uniq_Mapped $total_uniq_mapped" >> ${output_pre}.MappingStats
#echo "Total_Mito $total_mito" >> ${output_pre}.MappingStats
echo "Total_Mito_Mapped $total_mito_mapped" >> ${output_pre}.MappingStats
echo "Total_Dups $total_dups" >> ${output_pre}.MappingStats


echo "Total_Pairs_MAPQ${MAPQ} $total_pairs_MAPQH" >> ${output_pre}.MappingStats
echo "Total_Mito_MAPQ${MAPQ} $total_mito_MAPQH" >> ${output_pre}.MappingStats
echo "Total_Dups_MAPQ${MAPQ} $total_dups_MAPQH" >> ${output_pre}.MappingStats

rm $tmp_sam_file



60 changes: 60 additions & 0 deletions scripts/get_bam4Cells.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash

set -e

input_bam=$1

# reading configure file
curr_dir=`dirname $0`
source ${curr_dir}/read_conf.sh
read_conf "$2"
read_conf "$3"

map_dir=${OUTPUT_DIR}/mapping_result
signal_dir=${OUTPUT_DIR}/signal
mkdir -p $map_dir
mkdir -p $signal_dir

ff=${OUTPUT_DIR}/filtered_matrix/${CELL_CALLER}/barcodes.txt ## cell barcodes

curr_dir=`dirname $0`

## extract bam for cell and non-cells
if [[ ! -e "${map_dir}/cell_barcodes.bam" ]]; then
${PERL_PATH}/perl ${curr_dir}/src/extract_bam4Cells.pl --cellbarcode_file $ff --bam_file $input_bam \
--output_dir $map_dir --samtools_path $SAMTOOLS_PATH
echo "The bam file was split between cell and non-cell!"
fi


## QC using cell barcodes bam
if [[ ! -e "${OUTPUT_DIR}/summary/cell_barcodes.MappingStats" ]]; then
echo "generate mapping stats for aggregated cell barcodes file..."
bash ${curr_dir}/cell_mapping_qc.sh $map_dir $2 $3
fi

unset PYTHONHOME
unset PYTHONPATH
ncore=$(nproc --all)
ncore=$((${ncore}/2))
for file0 in $(find $map_dir -name "cell_barcodes.bam")
do
echo "generate bw file..."
pre=$(basename $file0)
pre=${pre/.bam/}
fname_bw=${signal_dir}/${pre}.bw
${SAMTOOLS_PATH}/samtools index -@ $ncore $file0
${DEEPTOOLS_PATH}/bamCoverage --numberOfProcessors max --normalizeUsing BPM \
--bam $file0 --binSize 20 --skipNonCoveredRegions \
--outFileName $fname_bw &
echo "generate count around TSS..."
${DEEPTOOLS_PATH}/computeMatrix reference-point -S $fname_bw -R $TSS \
-a 1200 -b 1200 -o ${signal_dir}/${pre}.aggregated.mtx.gz &
wait
done

rm ${map_dir}/non_cell_barcodes.bam

echo "Aggregated signal was generated for cell barcodes and non-cell barcodes!"


2 changes: 1 addition & 1 deletion scripts/mapping.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ bash ${curr_dir}/mapping_qc.sh ${mapRes_dir} $2 $3

echo "Simple mapping stats summary Done!"

echo "Getting bed file for read pair (fragment) information"
echo "Getting txt file for read pair (fragment) information"
${PERL_PATH}/perl ${curr_dir}/src/simply_bam2frags.pl --read_file ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort.MAPQ${MAPQ}.bam \
--output_file ${qc_dir}/${OUTPUT_PREFIX}.fragments.txt --samtools_path $SAMTOOLS_PATH

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
41 changes: 26 additions & 15 deletions scripts/src/extract_bam4bcs.pl → scripts/src/extract_bam4Cells.pl
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#Run as follows:
#perl extract_bam4bcs.pl --barcode_file barcode_file --bam_file bam_file --output_prefix output_prefix --samtools_path SAMTOOLS_PATH
#bam file for given barcodes will be saved in output_prefix.bam
#perl extract_bam4bcs.pl --cellbarcode_file cellbarcode_file --bam_file bam_file --output_dir output_dir --samtools_path SAMTOOLS_PATH
#bam file for given barcodes will be saved in output_dir.bam
#
#
#example:
#barcode_file format:
#Barcode
#cellbarcode_file format:
#AAACGAAAGAAATGGG-1
#AAACGAAAGTCTCTAG-1
#AAACGAACAACTAGAA-1
Expand All @@ -18,23 +17,27 @@
#Receive options from command line
use Getopt::Long;

GetOptions( 'barcode_file=s' => \my $barcode_file
GetOptions( 'cellbarcode_file=s' => \my $cellbarcode_file
, 'bam_file=s' => \my $bam_file
, 'output_prefix=s' => \my $output_prefix
, 'output_dir=s' => \my $output_dir
, 'samtools_path=s' => \my $samtools_path
);



open(BARCODE, $barcode_file ) or die("Cannot read $barcode_file \n");
open(BARCODE, $cellbarcode_file ) or die("Cannot read $cellbarcode_file \n");
open(BAM, "$samtools_path/samtools view -h $bam_file |" ) or die("Cannot read $bam_file \n");
open(OUT, ">$output_prefix" ) or die("Cannot write $output_prefix \n");

my $output_sam=$output_dir."/cell_barcodes.sam";
my $output_sam1=$output_dir."/non_cell_barcodes.sam";
open(OUT, ">$output_sam" ) or die("Cannot write $output_sam \n");
open(OUT1, ">$output_sam1" ) or die("Cannot write $output_sam1 \n");

my %cell_barcodes = ();

my $header = <BARCODE>;
#my $header = <BARCODE>;

print("I am reading barcode information file: $barcode_file \n");
print("I am reading barcode information file: $cellbarcode_file \n");

my $pair_counter = 0;
while(my $line = <BARCODE>)
Expand All @@ -46,7 +49,7 @@
$cell_barcodes{$barcode} = 1;
}

print "I have successfully read $pair_counter cell-barcode from $barcode_file \n";
print "I have successfully read $pair_counter cell-barcode from $cellbarcode_file \n";

print "...\n";

Expand All @@ -65,6 +68,7 @@
$bam_line_counter++;
if ($line =~ /^\@/){
print OUT $line; ## pring header
print OUT1 $line; ## pring header
}
chomp $line;
if($bam_line_counter % 1000000 == 0)
Expand All @@ -87,14 +91,21 @@
$matching_line_counter++;
my $output = $line."\n";
print OUT $output;
}else{
my $output = $line."\n";
print OUT1 $output;
}

}#while(<BAM>)

print("I have read $bam_line_counter reads from input BAM file: $bam_file .\n");
print("I have written $matching_line_counter reads into $output_prefix for selected barcodes .\n");
print("I have written $matching_line_counter reads into $output_dir for selected barcodes .\n");

print "convert sam to bam file:\n";
my $output_bam=$output_prefix.".bam";
system("$samtools_path/samtools view -bS $output_prefix > $output_bam ")
system("rm $output_prefix")
my $output_bam=$output_dir."/cell_barcodes.bam";
my $output_bam1=$output_dir."/non_cell_barcodes.bam";
system("$samtools_path/samtools view -@ 4 -bS $output_sam > $output_bam &");
system("$samtools_path/samtools view -@ 4 -bS $output_sam1 > $output_bam1 &");
system("wait");
system("rm $output_sam");
system("rm $output_sam1");
Loading

0 comments on commit 0dc7613

Please sign in to comment.