-
Notifications
You must be signed in to change notification settings - Fork 3
/
extract_cluster_members.py
executable file
·114 lines (85 loc) · 2.85 KB
/
extract_cluster_members.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/python
from sys import argv,exit
from os import popen, system
import string
def Help():
print
print 'Usage: '+argv[0]+' <.info file> <cluster number> <cluster number> ... '
print ' Extracts cluster members from specified clusters (with 0 being'
print ' the number of the first cluster.'
print
exit()
if len(argv)<2:
Help()
only_hom001 = 0
if argv.count('-hom001'):
pos = argv.index('-hom001')
del(argv[pos])
only_hom001 = 1
infile = argv[1]
clusternums = argv[2:]
NSTRUCT = 99999999
tags = []
tagfilename = {}
assert( infile[-4:] == 'info' ) # Its output from Phil's clustering program
lines = popen('grep CLUSTER_INFO %s' % infile).readlines()
fid = open('temp.list','w')
taglist = {}
whichcenter = {}
whichmember = {}
clustercount = 0
for clusternum in clusternums:
line = lines[ int(clusternum) ]
members = string.split(line,'Members:')[-1][:-1]
clustermembers = string.split(members,',')[1:]
count = 0
for clustermember in clustermembers:
cols = string.split(clustermember,':')
outfilename = cols[0]
if not outfilename in taglist.keys():
taglist[ outfilename ] = []
therest = string.split(cols[1],' ')[0]
fulltag = therest
if fulltag.find('S') > -1:
# tag = 'S'+string.join(string.split(fulltag,'S')[0:],'')
S_index = fulltag.find('S')
tag = 'S' + fulltag[S_index+1:]
else:
tag = 'F'+string.join(string.split(fulltag,'F')[0:],'')
taglist[outfilename].append(tag)
whichcenter[tag] = int( clusternum )
count = count + 1
whichmember[tag] = count
# tagfilename[ tag ] = 'cluster%03d.%d' % (int(clusternum), count)
if (count >= NSTRUCT): break
clustercount += 1
outfilenames = taglist.keys()
print 'OUTFILENAMES: ',outfilenames
if only_hom001:
outfilenames = []
for outfilename in taglist.keys():
if outfilename.find('hom001') >= 0:
outfilenames = [outfilename]
break
for outfilename in outfilenames:
fid = open('temp.list','w')
for tag in taglist[outfilename]:
fid.write(tag+'\n')
fid.close()
command = '~rhiju/rosetta++/rosetta.gcc -extract -l temp.list -paths ~rhiju/paths.txt -s '+outfilename
lines = popen('head -n 8 '+outfilename).readlines()
if len(string.split(lines[7])) > 10:
command += ' -fa_input'
line = lines[0]
if (line.count('a') or line.count('c') or
line.count('g') or line.count('u')):
command += ' -enable_dna -enable_rna '
print(command)
system(command)
for tag in taglist[outfilename]:
command = 'mv %s.pdb %s.cluster%03d.%d.pdb' % (tag,infile,whichcenter[tag],whichmember[tag])
print(command)
system(command)
command = 'rm temp.list'
print(command)
system(command)