diff --git a/SEACR_1.0.sh b/SEACR_1.0.sh index 1ee8af9..f3e59ef 100755 --- a/SEACR_1.0.sh +++ b/SEACR_1.0.sh @@ -7,7 +7,7 @@ then echo " SEACR: Sparse Enrichment Analysis for CUT&RUN - Usage: bash SEACR_1.0.sh .bg [.bg | ] ["norm" | "non"] ["union" | "AUC"] output prefix + Usage: bash SEACR_1.1.sh .bg [.bg | ] ["norm" | "non"] ["relaxed" | "stringent"] output prefix Description of input fields: @@ -17,12 +17,11 @@ then Field 3: “norm” denotes normalization of control to target data, “non” skips this behavior. "norm" is recommended unless experimental and control data are already rigorously normalized to each other (e.g. via spike-in). - Field 4: “union” forces implementation of a maximum signal threshold in addition to the total signal threshold, and corresponds to the “union” mode described in the text, whereas “AUC” avoids this behavior, and corresponds to “AUC only” mode. + Field 4: “relaxed” uses a total signal threshold between the knee and peak of the total signal curve, and corresponds to the “relaxed” mode described in the text, whereas “stringent” uses the peak of the curve, and corresponds to “stringent” mode. Field 5: Output prefix Output file: - .auc.threshold.merge.bed (Bed file of enriched regions) Output data structure: @@ -30,7 +29,6 @@ then Description of output fields: - Field 1: Chromosome Field 2: Start coordinate @@ -44,16 +42,13 @@ then Field 6: Region representing the farthest upstream and farthest downstream bases within the denoted coordinates that are represented by the maximum bedgraph signal Examples: - - bash SEACR_1.0.sh target.bedgraph IgG.bedgraph norm AUC output - Calls enriched regions in target data using normalized IgG control track with AUC threshold + bash SEACR_1.1.sh target.bedgraph IgG.bedgraph norm stringent output + Calls enriched regions in target data using normalized IgG control track with stringent threshold - bash SEACR_1.0.sh target.bedgraph IgG.bedgraph non union output - Calls enriched regions in target data using non-normalized IgG control track with AUC and max signal thresholds - - bash SEACR_1.0.sh target.bedgraph 0.01 non AUC output + bash SEACR_1.1.sh target.bedgraph IgG.bedgraph non relaxed output + Calls enriched regions in target data using non-normalized IgG control track with relaxed threshold + bash SEACR_1.1.sh target.bedgraph 0.01 non stringent output Calls enriched regions in target data by selecting the top 1% of regions by area under the curve (AUC) - " exit 1 fi @@ -90,14 +85,14 @@ fi height=`echo $4` -if [[ $height == "union" ]] +if [[ $height == "relaxed" ]] then - echo "Using peak height in addition to AUC threshold" -elif [[ $height == "AUC" ]] + echo "Using relaxed threshold" +elif [[ $height == "stringent" ]] then - echo "Proceeding without peak height threshold" + echo "Using stringent threshold" else - echo "Must specify \"union\" to include max signal threshold or \"AUC\" for no max signal threshold in fourth input" + echo "Must specify \"stringent\" or \"relaxed\" in fourth input" exit 1 fi @@ -122,36 +117,36 @@ path=`dirname $0` if [[ -f $2 ]] && [[ $norm == "norm" ]] then echo "Calculating threshold using normalized control: $(date)" - Rscript $path/SEACR_1.0.R --exp=$password.auc --ctrl=$password2.auc --norm=yes --output=$password + Rscript $path/SEACR_1.1.R --exp=$password.auc --ctrl=$password2.auc --norm=yes --output=$password elif [[ -f $2 ]] then echo "Calculating threshold using non-normalized control: $(date)" - Rscript $path/SEACR_1.0.R --exp=$password.auc --ctrl=$password2.auc --norm=no --output=$password + Rscript $path/SEACR_1.1.R --exp=$password.auc --ctrl=$password2.auc --norm=no --output=$password else echo "Using user-provided threshold: $(date)" - Rscript $path/SEACR_1.0.R --exp=$password.auc --ctrl=$2 --norm=no --output=$password + Rscript $path/SEACR_1.1.R --exp=$password.auc --ctrl=$2 --norm=no --output=$password fi +fdr=`cat $password.fdr.txt | sed -n '1p'` ## Added 5/15/19 for SEACR_1.1 +fdr2=`cat $password.fdr.txt | sed -n '2p'` ## Added 5/15/19 for SEACR_1.1 + #thresh=`cat $exp.threshold.txt` thresh=`cat $password.threshold.txt | sed -n '1p'` thresh2=`cat $password.threshold.txt | sed -n '2p'` echo "Creating thresholded feature file: $(date)" -if [[ $height == "union" ]] +if [[ $height == "relaxed" ]] then -# awk -v value=$thresh '$4 > value {print $0}' $password.auc.bed | awk -v value2=$thresh2 '$5 < value2 {print $0}' > $password.auc.threshold.bed (Previous behavior) - awk -v value=$thresh -v value2=$thresh2 '$4 > value || $5 > value2 {print $0}' $password.auc.bed > $password.auc.threshold.bed #(Current behavior as of 2/7/19) + echo "Empirical false discovery rate = $fdr2" + awk -v value=$thresh2 '$4 > value {print $0}' $password.auc.bed > $password.auc.threshold.bed else + echo "Empirical false discovery rate = $fdr" awk -v value=$thresh '$4 > value {print $0}' $password.auc.bed > $password.auc.threshold.bed fi if [[ -f $2 ]] then -# if [[ $height == "union" ]] -# then -# awk -v value=$thresh -v value2=$thresh2 '$4 > value || $5 > value2 {print $0}' $password2.auc.bed > $password2.auc.threshold.bed -# else if [[ $norm == "norm" ]] #If normalizing, multiply control bedgraph by normalization constant then constant=`cat $password.norm.txt | sed -n '1p'` @@ -159,7 +154,6 @@ then mv $password2.auc2.bed $password2.auc.bed fi awk -v value=$thresh '$4 > value {print $0}' $password2.auc.bed > $password2.auc.threshold.bed -# fi fi echo "Merging nearby features and eliminating control-enriched features: $(date)" @@ -180,6 +174,7 @@ rm $password.auc.bed rm $password.auc rm $password.threshold.txt rm $password.auc.threshold.bed +rm $password.fdr.txt ## Added 5/15/19 for SEACR_1.1 if [[ -f $2 ]] then rm $password2.auc.bed