configure_user.txt

######################################################################
## provide the user specific paramters
## ONLY change the value, DO NOT change variable name
######################################################################

## Start editing ##

######################################################################
## global setting ##
######################################################################
OUTPUT_PREFIX = pbmc10k     ## prefix for ouput file name
isSingleEnd = FALSE         ## scTHS-seq uses single end sequencing

## annotation files (blacklist, promoters, enhancers, TSS, etc)
BLACKLIST = annotation/hg38_blacklist.bed
PROMOTERS = annotation/hg38_promoter.bed
ENHANCERS = annotation/hg38_enhancer.bed
TSS = annotation/hg38_tss.bed
CHROM_SIZE_FILE = annotation/chrom_hg38.sizes

GENOME_NAME = hg38   ## used for TF motif enrichemnt and footprinting analysis
plotEPS = TRUE  ## print figures in eps format when generating summary report


#####################################
## Adapter trimming ##
#####################################
TRIM_METHOD = trim_galore   ## one of trim_galore (default), Trimmomatic or not specified (means DONOT trim)

## adapter sequence should be speficied if Trimmomatic was used to trim
## you dont need to specify this if your TRIM_METHOD is trim_galore or not specified
ADAPTER_SEQ = /mnt/isilon/tan_lab/yuw1/local_tools/Trimmomatic-0.39/adapters/NexteraPE-PE.fa


#########################################################
## mapping ##
## if your bwa is selected, you don't
## have to specify options for bowtie/bowtie2, vice versa
#########################################################

MAPPING_METHOD = bwa  ## one of bwa/bowtie/bowtie2

## extra mapping options (NO NEED TO SPECIFY INPUT FASTQS AND OUTPUT FILE/DIRECTORY)
BWA_OPTS = -t 16
BWA_INDEX = hg38_genome/hg38_genome.fa

BOWTIE_OPTS = --quiet -p 16 
BOWTIE_INDEX = bowtie-1.2.2/indexes/GRCh38/GRCh38_no_alt

BOWTIE2_OPTS = --end-to-end -p 16
BOWTIE2_INDEX = bowtie2.2.9/indexes/GRCh38/GRCh38

MAPQ = 30  ## filter bam by MAPQ for downstream analysis 
CELL_MAP_QC = FALSE  ## output mapping stats for cell barcodes
SHIFT_READS_IN_BAM = FALSE ## if TRUE, shift reads in + strand +4bp, in - strand -5bp 


#################################################################################
## peak calling
#################################################################################

PEAK_CALLER = MACS2  ## one of MACS2, BIN, and COMBINED

## provided extra options for macs2 (NO NEED TO SPECIFY -t, -n, -f here), like

#MACS2_OPTS = -q 0.05 -g hs  
## (change to -g mm if you are using mouse data) 

## or something like this to ignore building shifting model 
MACS2_OPTS = -q 0.05 -g hs --nomodel --extsize 200 --shift -100

BIN_RESL = 5000    ## bin resolution in bp

########################################################
## cell calling
########################################################
CELL_CALLER = FILTER  ## EmptyDrop/cellranger/FILTER

EmptyDrop_FDR = 0.001

# set cutoff to define cell if CELL_CALLER is specified as FILTER
# ignored if CELL CALLER was specified other than FILTER
FILTER_BC_CUTOFF = --min_uniq_frags 2000 --max_uniq_frags 60000 --min_frac_peak 0.5 --min_frac_tss 0.0 --min_frac_promoter 0 --min_frac_enhancer 0.0 --max_frac_mito 0.1 --min_tss_escore 1


##############################################################################
## clustering and dimension reduction
## a seurat object will be created and saved, embeded information about
## reduced dimensions, normalized data, and 
## cluster labels ( extract by seuat_obj$active_cluster)
## Note if normalization method is tf-idf, then the matrix was presumely set as binary
##############################################################################
norm_by = tf-idf ## or log (just log transformation) or NA
Top_Variable_Features = 5000 ## number/fraction of variable features used for seurat
REDUCTION = pca  ## pca/lda, note that UMAP and TSNE will be automatically calculated correspondly
nREDUCTION = 30 ## the reduced dimension
CLUSTERING_METHOD = seurat  ## seurat/cisTopic/kmeans/LSI/SCRAT/chromVAR/scABC
K_CLUSTERS = 0.2  ## the number of cluster (in integer) or the resolution parameter (in float) for louvain algorithm (implemented by seurat)
prepCello = TRUE  ## generate object for VisCello (for visualization)

rmDoublets = TRUE ## remove potential doublets or not
exptDoubletRate = 0.03  ## default expected doublet rate, can be overwritten in rmDoublets module

#######################################################################################
## differential accessible analysis
## compare two groups
## if group1 is specified as 'one' and group1 as 'rest', then will
## conduct all one-vs-rest comparison
#######################################################################################
RUN_DA = TRUE  ## run differential analysis or not
group1 = 0:1   ## either one or multiple cluster names, separated by colon, or 'one'
group2 = 2     ## cluster name as above or 'rest'
test_use = wilcox  ## one of negbinom, LR, wilcox, t, DESeq2


#######################################################################################
## GO analysis (need to do differential analysis first)
#######################################################################################
RUN_GO = TRUE ## run GO analysis after DA
GO_TYPE = BP  ## BP/CC/kegg


#######################################
## split bam by cluster 
## and output bw and bedgraph files
#######################################
SPLIT_BAM2CLUSTER = TRUE


##################################################################
## TF footprinting analysis ##
## support following three types of comparions, like
## group1_fp = 0:1, group2_fp = 2      --- cluster0,1 vs cluster2
## group1_fp = one, group2_fp = rest --- all one vs rest
##################################################################
DO_FOOTPRINT = FALSE

## comparing two groups, set similarly as in doing DA
group1_fp = 0:1   ## either set of clusters name or 'one'
group2_fp = 2     ## cluster name as above or 'rest'
pvalue_fp = 0.05


############################################################################
## cicero cis-interaction ##
###########################################################################
RUN_Cicero = TRUE

## plot interactions within Cicero_Plot_Region on the summary report
## you can also specify it as a genomic location, a gene name or none (skip the plot) 
Cicero_Plot_Region = chr5:140610000-140640000 


############################################################################
## about integrate module ##
## integrate two or more samples
## by VFACS, cca (or seurat), rpca, rlsi(or signac), harmony, or pool 
############################################################################
Integrate_By = VFACS  ## one of VFACS, cca, rpca, rlsi, pool or harmony 

mergePeaksWithin = 0 ## maximum distance between peaks to be merged. defaults to 0 which means overlapping or bookended features. note that you can use negative distances to enforce a minimum overlap.

filterPeaksQvalue = 0.05 ## further filter peaks by qvalue before merging them
nDim4Integration = 30 ## reduced dimemsions for integrated data
nFeature4Integration = 5000 ## top variable features selected for integration, not used for rlsi
prepCello4Integration = FALSE ## prepare cello for integrated object