-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdb.py
88 lines (61 loc) · 2.43 KB
/
pdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 17 13:03:41 2022
@author: huzongxiang
"""
from pathlib import Path
import pickle as pkl
import numpy as np
unit=10.0
path = Path("./")
sample_path = path/"samples_2000.pkl"
# sample_path = Path("C:\\Users\\huzon\\Desktop\\samples_2100.pkl")
save_path = path/"samples/"
residue_names = {
0 : 'ALA', 1 : 'CYS', 2 : 'ASP', 3 : 'GLU',
4 : 'PHE', 5 : 'GLY', 6 : 'HIS', 7 : 'LYS',
8 : 'ILE', 9 : 'LEU', 10 : 'MET', 11 : 'ASN',
12 : 'PRO', 13 : 'GLN', 14 : 'ARG', 15 : 'SER',
16 : 'THR', 17 : 'VAL', 18 : 'TYR', 19 : 'TRP'}
with open(sample_path,'rb') as f:
samples = pkl.load(f)
protein_residues = samples["h"]
protein_coords = samples["x"]
batch_size = len(protein_residues)
protein_residue_indices = []
protein_x = []
protein_y = []
protein_z = []
for i in range(batch_size):
serials = np.argmax(protein_residues[i], -1)
coords_x = protein_coords[i][:, 0] * unit
coords_y = protein_coords[i][:, 1] * unit
coords_z = protein_coords[i][:, 2] * unit
protein_residue_indices.append(serials)
protein_x.append(coords_x)
protein_y.append(coords_y)
protein_z.append(coords_z)
def write_atom_line(serial, name, resName, x, y, z, resSeq, element="C", altLoc=" ", chainID="L", iCode=" ", occupancy=1.00, tempFactor=1.00):
if len(name) == 4:
pdb_line = f"ATOM {serial:>5d} {name:<4s}{altLoc}{resName:3s} {chainID}{resSeq:>4d}{iCode} {x:>8.3f}{y:>8.3f}{z:>8.3f}{occupancy:>6.2f}{tempFactor:>6.2f} {element:>2s}\n"
else:
pdb_line = f"ATOM {serial:>5d} {name:>2s} {altLoc}{resName:3s} {chainID}{resSeq:>4d}{iCode} {x:>8.3f}{y:>8.3f}{z:>8.3f}{occupancy:>6.2f}{tempFactor:>6.2f} {element:>2s}\n"
return pdb_line
protein_pdbs = []
for i in range(batch_size):
protein_pdb = []
for j, residue_indice in enumerate(protein_residue_indices[i]):
resName = residue_names[residue_indice]
x = protein_x[i][j]
y = protein_y[i][j]
z = protein_z[i][j]
name = "CA"
atom_line = write_atom_line(serial=j, name=name, resName=resName, x=x, y=y, z=z, resSeq=j)
protein_pdb.append(atom_line)
protein_pdbs.append(protein_pdb)
save_path.mkdir(exist_ok=True)
for i, pdb in enumerate(protein_pdbs):
file = "sample_" + str(i) + '.pdb'
save_file = save_path/file
with open(save_file, "w") as f:
f.writelines(pdb)