-
Notifications
You must be signed in to change notification settings - Fork 85
/
create_caches.py
executable file
·61 lines (51 loc) · 2.16 KB
/
create_caches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
'''Takes a bunch of types training files. First argument is what index the receptor starts on
(ligand is assumed to be right after). Reads in the gninatypes files specified in these types
files and writes out two monolithic receptor and ligand cache files for use with recmolcache
and ligmolcache molgrid options'''
import os, sys
import struct, argparse
def writemol(root, mol, out):
'''mol is gninatypes file, write it in the appropriate binary format to out'''
fname = root+'/'+mol
try:
with open(fname,'rb') as gninatype:
if len(fname) > 255:
print("Skipping",mol,"since filename is too long")
return
s = bytes(mol)
out.write(struct.pack('b',len(s)))
out.write(s)
data = gninatype.read()
assert(len(data) % 16 == 0)
natoms = len(data)/16
out.write(struct.pack('i',natoms))
out.write(data)
except Exception as e:
print(mol)
print(e)
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--col', required=True,type=int,help='Column receptor starts on')
parser.add_argument('--recmolcache', default='rec.molcache',type=str,help='Filename of receptor cache')
parser.add_argument('--ligmolcache', default='lig.molcache',type=str,help='Filename of ligand cache')
parser.add_argument('-d','--data_root',type=str,required=False,help="Root folder for relative paths in train/test files",default='')
parser.add_argument('fnames',nargs='+',type=str,help='types files to process')
args = parser.parse_args()
recout = open(args.recmolcache,'wb')
ligout = open(args.ligmolcache,'wb')
seenlig = set()
seenrec = set()
for fname in args.fnames:
for line in open(fname):
vals = line.split()
rec = vals[args.col]
ligs = vals[args.col+1:]
if rec not in seenrec:
seenrec.add(rec)
writemol(args.data_root, rec, recout)
for lig in ligs:
if lig == '#':
break
if lig not in seenlig:
seenlig.add(lig)
writemol(args.data_root, lig, ligout)