-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
134 lines (123 loc) · 4.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
This module assists in the generation and maintenance of a
database containing microbial ecology data from human microbiome
samples.
"""
from datetime import datetime
import sys # for the command-line params
import config
import db
import projects
import management
if __name__ == "__main__":
if len(sys.argv) < 2:
print('No command given. Exiting.')
exit(0)
# only command-line param is how many to do in this session
if sys.argv[1] == 'runs':
TODO = 2000 if len(sys.argv) < 3 else sys.argv[2]
db.find_runs(TODO, per_query=80)
elif sys.argv[1] == 'asvs':
db.find_asv_data(100)
elif sys.argv[1] == 'xml':
if len(sys.argv) < 4:
print('The "xml" command requires two parameters: a taxon ID (e.g. txid408170) and the name of the file.')
exit(1)
db.load_xml(sys.argv[2], sys.argv[3],
save_samples=True, save_tags=False)
elif sys.argv[1] == 'tags':
if len(sys.argv) < 4:
print('The "tags" command requires two parameters: a taxon ID (e.g. txid408170) and the name of the file.')
exit(1)
db.load_xml(sys.argv[2], sys.argv[3],
save_samples=False, save_tags=True)
elif sys.argv[1] == 'runit':
# process a single project
if len(sys.argv) < 3:
print('ERROR: No project ID specified.')
exit(1)
proj = projects.Project(sys.argv[2])
connection = db.Connection()
proj.initialize_pipeline(connection)
proj.RUN(connection)
elif sys.argv[1] == 'discard':
# remove a project
if len(sys.argv) < 3:
print('ERROR: No project ID specified.')
exit(1)
proj = projects.Project(sys.argv[2])
confirm = input(f'Really discard project {sys.argv[2]}? (y/n) ')
if confirm != 'y':
print('User input was not "y"; skipping.')
exit(0)
if len(sys.argv) < 4:
REASON = input('Provide reason for DB: ')
else:
REASON = sys.argv[3]
proj.errors.append(REASON)
connection = db.Connection()
proj.Discard(connection)
elif sys.argv[1] == 'again':
# retry a project
if len(sys.argv) < 3:
print('ERROR: No project ID specified.')
exit(1)
proj = projects.Project(sys.argv[2])
connection = db.Connection()
proj.RUN(connection)
elif sys.argv[1] == 'status':
# check project status
if len(sys.argv) < 3:
print('ERROR: No project ID specified.')
exit(1)
PID = sys.argv[2]
proj = projects.Project(PID)
if proj.check_if_done(): # true if it's complete
proj.Load_results_summary()
proj.print_errors()
else:
proj.Report_progress()
elif sys.argv[1] == 'eval':
if len(sys.argv) < 3:
print('ERROR: No project ID specified.')
exit(1)
PID = sys.argv[2]
proj = projects.Project(PID)
if not proj.check_if_done(): # true if it's complete
proj.Report_progress()
proj.Load_results_summary()
proj.print_errors()
exit(0)
connection = db.Connection()
proj.REACT(connection)
elif sys.argv[1] == 'compendium':
connection = db.Connection()
management.print_compendium_summary(connection)
elif sys.argv[1] == 'summary':
connection = db.Connection()
current = management.determine_projects(connection)
management.print_projects_summary(*current)
elif sys.argv[1] == 'FORWARD':
connection = db.Connection()
current = management.determine_projects(connection)
management.print_projects_summary(*current)
management.advance_projects(*current, connection)
elif sys.argv[1] == 'autoforward':
connection = db.Connection()
# Process the existing projects:
current = management.determine_projects(connection)
management.print_projects_summary(*current)
management.advance_projects(*current, connection, auto=True)
# Trigger new jobs automatically
done, running, not_done = current # just unpacking
TOSTART = config.max_projects-len(running+not_done)
todo = []
if TOSTART > 0:
todo = management.find_todo(connection, needed=TOSTART, max_samples=1000)
now = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
print(f'{now}: {len(running+not_done)} projects running. Starting {len(todo)} additional projects: {todo}')
for pid in todo:
print(f'Launching {pid}')
proj = projects.Project(pid)
proj.initialize_pipeline(connection)
proj.RUN(connection)