-
Notifications
You must be signed in to change notification settings - Fork 3
/
CNNpeaks
executable file
·91 lines (79 loc) · 4.58 KB
/
CNNpeaks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/python
"""
All annotations of these sources are written with poor english from writer.
Please understand if it will be so crappy. . . .
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import argparse
import sys
import logging
num_grid = 12000
windowSize = 100000
def main():
global num_grid, windowSize
#################### Setting arguments ########################
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-m","--runMode", choices=['preprocess','buildModel','peakCall','checkData','errorCall','randomLabel','inspection'] ,
help="Select a mode.")
arg_parser.add_argument("-i","--inputDir", help="Input directory including labeled data and bam alignment files."
"\nIn case of callPeak mode, it will be input bam file to call peaks.")
arg_parser.add_argument("-l","--labelData", help="Label data for error calling.")
arg_parser.add_argument("-o","--outputBed", default=None, help="Output bedfile name.")
arg_parser.add_argument("-grid","--gridSize",default=12000, help="Define numbers of grid for each training data")
arg_parser.add_argument("-s","--searchingDist", help="DBSCAN clustering parameter during preprocessing steps")
arg_parser.add_argument("-eps","--basePointEPS",help="DBSCAN clustering parameter during preprocessing steps")
arg_parser.add_argument("-w","--windowSize",default=100000,help="Window size for peak calling.")
arg_parser.add_argument("-n","--modelName",default=None, help="Define which model to call peak.")
arg_parser.add_argument("-kf","--KFold",default=10, help="K fold cross validation during training models.")
arg_parser.add_argument("-ncv","--notCrossValid", action='store_true')
arg_parser.add_argument("-ln","--labelNum", default=30, help="The number of label for -randomLabel")
arg_parser.add_argument("-r","--regions", help="Specific genome regions for peak calling. eg) chr3:100023400-1000044323 , you can use 's' or 'e' to indicate start and end of the chromosome")
arg_parser.add_argument("--broad", help="Broad call mode to call broad histone marks", action='store_true')
args = arg_parser.parse_args()
if args.inputDir == None:
logger.error("'-i' : Input Directory was missed.")
exit()
###############################################################
num_grid= int(args.gridSize)
windowSize = int(args.windowSize)
if args.broad:
windowSize = int(args.windowSize) * 8
if args.runMode == 'preprocess':
from preProcessing.preProcessing import run as preProcessing
preProcessing(args.inputDir, logger, num_grid=num_grid)
elif args.runMode =='buildModel':
from buildModel.buildModel import run as buildModel
buildModel(args.inputDir, logger, num_grid=num_grid, K_fold_in=int(args.KFold), cross_valid=not (args.notCrossValid))
elif args.runMode == 'peakCall':
from peakCalling.callPeaks import run as callPeaks
callPeaks(args.inputDir, logger, window_size=windowSize, num_grid=num_grid, model_name=args.modelName, regions=args.regions, bed_name=args.outputBed)
elif args.runMode == 'inspection':
from inspection.inspection import run as inspection
elif args.runMode == 'checkData':
from utility.labelManager import labelManager
labelManager(args.inputDir)
elif args.runMode == 'errorCall':
from utility.errorCall import run as errorCall
errorCall(args.inputDir, args.labelData, logger)
elif args.runMode == 'randomLabel':
from utility.randomLabel import run as randomLabel
randomLabel(args.inputDir)
else:
logger.info("-m ( --runMode ) must be one of : { preprocess, buildModel, peakCall , checkData, errorCall }.")
if __name__ == '__main__':
logging.getLogger('tensorflow').setLevel(logging.FATAL)
logger = logging.getLogger("ConvLog")
logger.setLevel(logging.INFO) # The logger object only output logs which have
# upper level than INFO.
log_format = logging.Formatter('%(asctime)s:%(message)s')
stream_handler = logging.StreamHandler() # Log output setting for the command line.
stream_handler.setFormatter(log_format) # The format of stream log will follow this format.
logger.addHandler(stream_handler)
#file_handler = logging.FileHandler() # Log output setting for the file.
#logger.addHandler(file_handler)
try:
main()
except KeyboardInterrupt:
sys.stderr.write("USER INTERRUPT. \n")
sys.exit()