-
Notifications
You must be signed in to change notification settings - Fork 3
/
align_based_on_structure.py
executable file
·216 lines (164 loc) · 5.87 KB
/
align_based_on_structure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/python
from sys import argv
from os.path import exists, expanduser
from os import system
from get_sequence import get_sequence
from math import exp,sqrt
PYDIR = expanduser('~rhiju')+'/python/'
assert( exists( PYDIR) )
file1 = argv[1]
other_files = argv[2:]
def get_dist2( v1, v2 ):
dx = v1[0] - v2[0]
dy = v1[1] - v2[1]
dz = v1[2] - v2[2]
return ( dx*dx + dy*dy + dz*dz )
def get_score( dist2 ):
CUTOFF2 = 6.0 * 6.0
# bonus for distances less than cutoff; penalties for longer distances.
score = exp( -1.0 * (dist2/CUTOFF2) ) #- exp( -1.0 )
#score = ( dist2 < CUTOFF2 )
return score
seq1 = get_sequence( file1 )
for file2 in other_files:
seq2 = get_sequence( file2 )
tmp_sup = 'tmp.pdb'
command = PYDIR+'/superimpose.py '+file1+' '+file2+' > '+tmp_sup
print command
system( command )
lines = open( tmp_sup ).readlines()
count = 0
rescount = 0
prev_resnum = ''
model_xyzs = [ [], [] ]
totres = []
for line in lines:
if len( line ) > 5 and line[:6] == 'ENDMDL':
totres.append( rescount )
count += 1
rescount = 0
if len(line) < 40: continue
resnum = line[22:26]
if not resnum == prev_resnum:
rescount += 1
if ( line[12:16]==' CA ' ): model_xyzs[ count ].append( [float(line[30:38]), float(line[38:46]), float(line[46:54])] )
prev_resnum = resnum
totres.append( rescount )
print 'Lengths: ', totres[0], ' and ', totres[1]
assert( len( model_xyzs[ 0 ] ) == totres[ 0 ] )
assert( len( model_xyzs[ 1 ] ) == totres[ 1 ] )
assert( len( seq1 ) == totres[ 0 ] )
assert( len( seq2 ) == totres[ 1 ] )
print 'Calculating distances ... '
dist2 = []
for i in range( totres[0] ):
dist2.append( [] )
for j in range( totres[1] ):
dist2[ i ].append( get_dist2( model_xyzs[0][i], model_xyzs[1][j] ) )
# consistency check
for i in range( totres[0] ):
for j in range( totres[1] ):
if ( j == 0 or dist2[i][j] < mindist ):
mindist = dist2[i][j]
best_j = j
#print i+1, best_j+1, sqrt( mindist)
#print map( lambda x:int(x), dist2[ 42 ] )
# Dynamic programming matrix
DP_score = []
# Boundary conditions
for i in range( totres[0]+1):
DP_score.append([])
for j in range( totres[1]+1):
DP_score[i].append( 0.0 )
# Fill in matrix, left to right, top to bottom.
#initialize
choice = []
for i in range( totres[0]+1 ):
choice.append( [] )
for j in range( totres[1]+1 ):
choice[i].append( [] )
for i in range( 1, totres[0] + 1 ):
choice[i][0] = [i-1,0]
for j in range( 1, totres[1] + 1 ):
choice[0][j] = [0,j-1]
for i in range( 1, totres[0]+1 ):
for j in range( 1, totres[1]+1 ):
alternatives = []
# stupid off by one's in dist2 matrix...
score = DP_score[i-1][j-1] + get_score( dist2[i-1][j-1] )
alternatives.append( [ score, [i-1,j-1] ] )
score = DP_score[i-1][j] # currently no gap penalty
alternatives.append( [ score, [i-1,j]] )
score = DP_score[i][j-1] # currently no gap penalty
alternatives.append( [ score, [i,j-1]] )
alternatives.sort()
alternative = alternatives[ -1 ] # Last element, highest score
DP_score[i][j] = alternative[ 0 ]
choice[i][j] = alternative[ 1 ]
# Backtrack
i = totres[0]
j = totres[1]
align_seq1 = ''
align_seq2 = ''
corresponding_pairs = []
while ( i > 0 or j > 0 ):
#print [i,j]
i_prev = choice[i][j][0]
j_prev = choice[i][j][1]
if ( i_prev == i-1 and j_prev == j-1 ):
# These will be reversed later
align_seq1 += seq1[ i-1 ]
align_seq2 += seq2[ j-1 ]
corresponding_pairs.append( [i, j] )
elif ( i_prev == i-1 ):
align_seq1 += seq1[ i-1 ]
align_seq2 += '-'
else:
align_seq1 += '-'
align_seq2 += seq2[ j-1 ]
i = i_prev
j = j_prev
# Reverse order
align_seq1_new = ''
align_seq2_new = ''
for i in range( len(align_seq1) ): align_seq1_new += align_seq1[ -1 - i ]
for i in range( len(align_seq2) ): align_seq2_new += align_seq2[ -1 - i ]
print align_seq1_new
print align_seq2_new
if file2.find( '.pdb' ) > 0:
seqfile = file2.replace('.pdb','.mapping')
else:
seqfile = file2 + '.mapping'
print 'Outputting this alignment to: ', seqfile
fid = open( seqfile, 'w')
fid.write( align_seq1_new+'\n' )
fid.write( align_seq2_new+'\n' )
fid.close()
pymol_file = seqfile.replace('.mapping','.pml')
print 'Display alignment through pymol script: ', pymol_file
fid2 = open( pymol_file, 'w' )
fid2.write( 'reinitialize\n')
command = PYDIR+'/parse_NMR_models.py '+tmp_sup
system( command )
file2_sup = file2+'.sup'
command = 'mv '+tmp_sup.replace('.pdb','_002.pdb') + ' ' + file2_sup
system( command )
fid2.write( 'load %s\n' % file1 )
fid2.write( 'load %s\n' % file2_sup )
fid2.write( 'hide everything\n' )
fid2.write( 'show cartoon\n' )
fid2.write( 'set cartoon_rect_length, 0.5 \n' )
fid2.write( 'set cartoon_rect_width, 0.2 \n' )
fid2.write( 'set cartoon_discrete_colors, 1\n' )
fid2.write( 'color white\n')
fid2.write( 'bg_color white\n')
colors = [ 'blue','red','green' ]
count = 0
for pair in corresponding_pairs:
fid2.write( 'color %s, %s and resi %d\n' % ( colors[count], file1.replace('.pdb',''), pair[0]) )
fid2.write( 'color %s, %s and resi %d\n' % ( colors[count], file2_sup, pair[1]) )
count += 1
count = count % len( colors )
fid2.close()
command = 'rm -rf tmp*'
system( command )