-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathOptiVac.py
executable file
·249 lines (206 loc) · 10 KB
/
OptiVac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#!/usr/bin/env python
# coding=utf-8
"""
###################################################################
Designing String-of-beads with optimal spacers
###################################################################
Authors: Benjamin Schubert and Oliver Kohlbacher
Date: June 2015
Version: 1.0
License: This software is under a three-clause BSD license
Introduction:
-------------
The software is a novel approach to construct epitope-based string-of-beads
vaccines in optimal order and with sequence-optimized spacers of flexible length
such that the recovery of contained epitopes is maximized and immunogenicity of
arising neo-epitopes is reduced.
Requirement:
-------------
Spacer Design uses the following software and libraries:
1) Python 2.7 (https://www.python.org/)
2) Fred2 (https://github.com/FRED-2/Fred2)
3) Cplex >= 12.5 (www.ilog.com)
4) LKH TSP-Approximation >= 2.0.7 (http://www.akira.ruc.dk/~keld/research/LKH/)
Please make sure you have installed said software/libraries
and their dependencies.
Installation:
-------------
First install all required software and libraries. CPLEX/LKH should be globally executable
via command line.
Usage:
-------------
usage: OptiVac.py [-h] -i INPUT -a ALLELES [-k MAX_LENGTH] [-al ALPHA]
[-be BETA] [-cp CLEAVAGE_PREDICTION]
[-ep EPITOPE_PREDICTION] [-thr THRESHOLD] -o OUTPUT
[-t THREADS]
The software is a novel approach to construct epitope-based string-of-beads
vaccines in optimal order and with sequence-optimized spacers of flexible
length such that the recovery of contained epitopes is maximized and
immunogenicity of arising neo-epitopes is reduced.
optional arguments:
-h, --help show this help message and exit
-i INPUT, --input INPUT
File containing epitopes (one peptide per line)
-a ALLELES, --alleles ALLELES
Specifies file containing HLA alleles with
corresponding HLA probabilities (one HLA per line)
-k MAX_LENGTH, --max_length MAX_LENGTH
Specifies the max. length of the spacers (default 6)
-al ALPHA, --alpha ALPHA
Specifies the first-order preference of the user in
the model [0,1] (default 0.99)
-be BETA, --beta BETA
Specifies the second-order preference of the user in
the model [0,1] (default 0).
-cp CLEAVAGE_PREDICTION, --cleavage_prediction CLEAVAGE_PREDICTION
Specifies the used cleavage prediction method (default
PCM) [available: PCM, ProteaSMMConsecutive, ProteaSMMImmuno]
-ep EPITOPE_PREDICTION, --epitope_prediction EPITOPE_PREDICTION
Specifies the used epitope prediction method (default
Syfpeithi) [available: Syfpeithi, BIMAS, SMM, SMMPMBEC]
-thr THRESHOLD, --threshold THRESHOLD
Specifies epitope prediction threshold for SYFPEITHI
(default 20).
-o OUTPUT, --output OUTPUT
Specifies the output file.
-t THREADS, --threads THREADS
Specifies number of threads. If not specified all
available logical cpus are used.
"""
import argparse
import sys
import math
import multiprocessing as mp
import random
from Fred2.IO import FileReader
from Fred2.Core import Allele
from Fred2.Core import Peptide
from Fred2.EpitopePrediction import EpitopePredictorFactory
from Fred2.EpitopeAssembly.EpitopeAssembly import EpitopeAssemblyWithSpacer
from Fred2.CleavagePrediction import CleavageSitePredictorFactory
def generate_alleles(allele_file, generated=None):
"""
generate allele objects from input
"""
result=[]
with open(allele_file, "r") as f:
for l in f:
al,freq = l.replace(","," ").replace(";"," ").replace("\n","").split()
if al.split("HLA-")[-1][0] in ["A","B","C"]:
result.append(Allele(al,prob=float(freq)))
return result
def main():
parser = argparse.ArgumentParser(description="""The software is a novel approach to construct epitope-based string-of-beads
vaccines in optimal order and with sequence-optimized spacers of flexible length
such that the recovery of contained epitopes is maximized and immunogenicity of
arising neo-epitopes is reduced. """)
parser.add_argument("-i", "--input",
required=True,
help="File containing epitopes (one peptide per line)"
)
parser.add_argument("-a", "--alleles",
required=True,
help="Specifies file containing HLA alleles with corresponding HLA probabilities (one HLA per line)"
)
#parameters of the model
parser.add_argument("-k","--max_length",
default=6,
type=int,
help="Specifies the max. length of the spacers (default 6)")
parser.add_argument("-al","--alpha",
default=0.99,
type=float,
help="Specifies the first-order preference of the user in the model [0,1] (default 0.99)")
parser.add_argument("-be","--beta",
default=0.0,
type=float,
help="Specifies the second-order preference of the user in the model [0,1] (default 0).")
parser.add_argument("-cp","--cleavage_prediction",
default="PCM",
help="Specifies the used cleavage prediction method (default PCM) [available: PCM, PROTEASMM_C, PROTEASMM_S]"
)
parser.add_argument("-ep","--epitope_prediction",
default="Syfpeithi",
help="Specifies the used epitope prediction method (default Syfpeithi) [available: Syfpeithi, BIMAS, SMM, SMMPMBEC]"
)
parser.add_argument("-thr","--threshold",
default=20,
type=float,
help="Specifies epitope prediction threshold for SYFPEITHI (default 20).")
parser.add_argument("-o", "--output",
required=True,
help="Specifies the output file.")
parser.add_argument("-t", "--threads",
type=int,
default=None,
help="Specifies number of threads. If not specified all available logical cpus are used.")
parser.add_argument("--ips-solver",
default="cplex",
choices=["cplex", "cbc"],
help="Executable name of the IPS solver. Executable needs to be available in PATH.")
parser.add_argument("--tsp-solution",
default="approximate",
choices=["approximate", "optimal"],
help="Type of solution of the TSP")
parser.add_argument("--random-order",
action="store_true",
help="Indicate whether to generate a random ordered string-of-beads polypeptide")
parser.add_argument("--seed",
type=int,
default=1,
help="Seed for random ordering of string-of-beads polypeptide")
args = parser.parse_args()
#parse input
peptides = list(FileReader.read_lines(args.input, in_type=Peptide))
#read in alleles
alleles = generate_alleles(args.alleles)
if args.cleavage_prediction.upper() not in ["PCM", "PROTEASMM_C", "PROTEASMM_S"]:
print "Specified cleavage predictor is currently not supported. Please choose either PCM, PROTEASMM_C, or PROTEASMM_S"
sys.exit(-1)
if args.epitope_prediction.upper() not in ["SYFPEITHI", "BIMAS", "SMM", "SMMPMBEC"]:
print "Specified cleavage predictor is currently not supported. Please choose either Syfpeithi, BIMAS, SMM, SMMPMBEC"
sys.exit(-1)
#set-up model
cl_pred = CleavageSitePredictorFactory(args.cleavage_prediction)
epi_pred = EpitopePredictorFactory(args.epitope_prediction)
thr = {a.name:args.threshold for a in alleles}
solver = EpitopeAssemblyWithSpacer(peptides,cl_pred,epi_pred,alleles,
k=args.max_length,en=9,threshold=thr,
solver=args.ips_solver, alpha=args.alpha, beta=args.beta,
verbosity=0)
#solve
#pre-processing has to be disable otherwise many solver will destroy the symmetry of the problem
#how to do this is dependent on the solver used. For CPLEX it is preprocessing_presolve=n
threads = mp.cpu_count() if args.threads is None else args.threads
if args.tsp_solution == "approximate":
svbws = solver.approximate(threads=threads,options={"preprocessing_presolve":"n","threads":1})
else:
svbws = solver.solve(threads=threads,options={"preprocessing_presolve":"n","threads":1})
# Generate random ordered string-of-breads, but still uses optimal spacers
# determined from the above solve function.
if args.random_order:
print "Generating a randomly ordered polypeptide"
random.seed(args.seed)
random_order_sob = []
random.shuffle(peptides)
for i in range(len(peptides)):
# Break from loop once we hit the last peptide
if i == len(peptides) - 1:
random_order_sob.extend([Peptide(str(peptides[i]))])
break
left_peptide = str(peptides[i])
right_peptide = str(peptides[i + 1])
opt_spacer = solver.spacer[(left_peptide, right_peptide)]
# Right peptide gets added in the next iteration
random_order_sob.extend([
Peptide(left_peptide),
Peptide(opt_spacer)
])
svbws = random_order_sob
print
print "Resulting String-of-Beads: ","-".join(map(str,svbws))
print
with open(args.output, "w") as f:
f.write("-".join(map(str,svbws)))
if __name__ == "__main__":
main()