From a54e9656b859f459a37049073ba419b068e0cd07 Mon Sep 17 00:00:00 2001 From: hgarrereyn Date: Fri, 14 May 2021 00:58:10 -0500 Subject: [PATCH] add cpu_gridify option, fix config import, remove openbabel dependency --- leadopt/grid_util.py | 211 +++++++++++++++++++++++++++++++++++++++--- leadopt/model_conf.py | 38 ++------ leadopt/util.py | 71 ++++++++------ 3 files changed, 247 insertions(+), 73 deletions(-) diff --git a/leadopt/grid_util.py b/leadopt/grid_util.py index b74a5d5..49be142 100644 --- a/leadopt/grid_util.py +++ b/leadopt/grid_util.py @@ -209,17 +209,194 @@ def gpu_gridify(grid, atom_num, atom_coords, atom_mask, layer_offset, numba.cuda.atomic.max(grid, idx, val) +@numba.jit(nopython=True) +def cpu_gridify(grid, atom_num, atom_coords, atom_mask, layer_offset, + batch_idx, width, res, center, rot, + point_radius, point_type, acc_type + ): + """Adds atoms to the grid in a GPU kernel. + + This kernel converts atom coordinate information to 3d voxel information. + Each GPU thread is responsible for one specific grid point. This function + receives a list of atomic coordinates and atom layers and simply iterates + over the list to find nearby atoms and add their effect. + + Voxel information is stored in a 5D tensor of type: BxTxNxNxN where: + B = batch size + T = number of atom types (receptor + ligand) + N = grid width (in gridpoints) + + Each invocation of this function will write information to a specific batch + index specified by batch_idx. Additionally, the layer_offset parameter can + be set to specify a fixed offset to add to each atom_layer item. + + How it works: + 1. Each GPU thread controls a single gridpoint. This gridpoint coordinate + is translated to a "real world" coordinate by applying rotation and + translation vectors. + 2. Each thread iterates over the list of atoms and checks for atoms within + a threshold to add to the grid. + + Args: + grid: DeviceNDArray tensor where grid information is stored + atom_num: number of atoms + atom_coords: array containing (x,y,z) atom coordinates + atom_mask: uint32 array of size atom_num containing a destination + layer bitmask (i.e. if bit k is set, write atom to index k) + layer_offset: a fixed ofset added to each atom layer index + batch_idx: index specifiying which batch to write information to + width: number of grid points in each dimension + res: distance between neighboring grid points in angstroms + (1 == gridpoint every angstrom) + (0.5 == gridpoint every half angstrom, e.g. tighter grid) + center: (x,y,z) coordinate of grid center + rot: (x,y,z,y) rotation quaternion + """ + # x,y,z = numba.cuda.grid(3) + for x in range(width): + for y in range(width): + for z in range(width): + + # center around origin + tx = x - (width/2) + ty = y - (width/2) + tz = z - (width/2) + + # scale by resolution + tx = tx * res + ty = ty * res + tz = tz * res + + # apply rotation vector + aw = rot[0] + ax = rot[1] + ay = rot[2] + az = rot[3] + + bw = 0 + bx = tx + by = ty + bz = tz + + # multiply by rotation vector + cw = (aw * bw) - (ax * bx) - (ay * by) - (az * bz) + cx = (aw * bx) + (ax * bw) + (ay * bz) - (az * by) + cy = (aw * by) + (ay * bw) + (az * bx) - (ax * bz) + cz = (aw * bz) + (az * bw) + (ax * by) - (ay * bx) + + # multiply by conjugate + # dw = (cw * aw) - (cx * (-ax)) - (cy * (-ay)) - (cz * (-az)) + dx = (cw * (-ax)) + (cx * aw) + (cy * (-az)) - (cz * (-ay)) + dy = (cw * (-ay)) + (cy * aw) + (cz * (-ax)) - (cx * (-az)) + dz = (cw * (-az)) + (cz * aw) + (cx * (-ay)) - (cy * (-ax)) + + # apply translation vector + tx = dx + center[0] + ty = dy + center[1] + tz = dz + center[2] + + i = 0 + while i < atom_num: + # fetch atom + fx, fy, fz = atom_coords[i] + mask = atom_mask[i] + i += 1 + + # invisible atoms + if mask == 0: + continue + + # point radius squared + r = point_radius + r2 = point_radius * point_radius + + # quick cube bounds check + if abs(fx-tx) > r2 or abs(fy-ty) > r2 or abs(fz-tz) > r2: + continue + + # value to add to this gridpoint + val = 0 + + if point_type == 0: # POINT_TYPE.EXP + # exponential sphere fill + # compute squared distance to atom + d2 = (fx-tx)**2 + (fy-ty)**2 + (fz-tz)**2 + if d2 > r2: + continue + + # compute effect + val = math.exp((-2 * d2) / r2) + elif point_type == 1: # POINT_TYPE.SPHERE + # solid sphere fill + # compute squared distance to atom + d2 = (fx-tx)**2 + (fy-ty)**2 + (fz-tz)**2 + if d2 > r2: + continue + + val = 1 + elif point_type == 2: # POINT_TYPE.CUBE + # solid cube fill + val = 1 + elif point_type == 3: # POINT_TYPE.GAUSSIAN + # (Ragoza, 2016) + # + # piecewise gaussian sphere fill + # compute squared distance to atom + d2 = (fx-tx)**2 + (fy-ty)**2 + (fz-tz)**2 + d = math.sqrt(d2) + + if d > r * 1.5: + continue + elif d > r: + val = math.exp(-2.0) * ( (4*d2/r2) - (12*d/r) + 9 ) + else: + val = math.exp((-2 * d2) / r2) + elif point_type == 4: # POINT_TYPE.LJ + # (Jimenez, 2017) - DeepSite + # + # LJ potential + # compute squared distance to atom + d2 = (fx-tx)**2 + (fy-ty)**2 + (fz-tz)**2 + d = math.sqrt(d2) + + if d > r * 1.5: + continue + else: + val = 1 - math.exp(-((r/d)**12)) + elif point_type == 5: # POINT_TYPE.DISCRETE + # nearest-gridpoint + # L1 distance + if abs(fx-tx) < (res/2) and abs(fy-ty) < (res/2) and abs(fz-tz) < (res/2): + val = 1 + + # add value to layers + for k in range(32): + if (mask >> k) & 1: + idx = (batch_idx, layer_offset+k, x, y, z) + if acc_type == 0: # ACC_TYPE.SUM + grid[idx] += val + elif acc_type == 1: # ACC_TYPE.MAX + grid[idx] = max(grid[idx], val) + + def mol_gridify(grid, atom_coords, atom_mask, layer_offset, batch_idx, - width, res, center, rot, point_radius, point_type, acc_type): + width, res, center, rot, point_radius, point_type, acc_type, + cpu=False): """Wrapper around gpu_gridify. (See gpu_gridify() for details) """ - dw = ((width - 1) // GPU_DIM) + 1 - gpu_gridify[(dw,dw,dw), (GPU_DIM,GPU_DIM,GPU_DIM)]( - grid, len(atom_coords), atom_coords, atom_mask, layer_offset, - batch_idx, width, res, center, rot, point_radius, point_type, acc_type - ) + if cpu: + cpu_gridify( + grid, len(atom_coords), atom_coords, atom_mask, layer_offset, + batch_idx, width, res, center, rot, point_radius, point_type, acc_type + ) + else: + dw = ((width - 1) // GPU_DIM) + 1 + gpu_gridify[(dw,dw,dw), (GPU_DIM,GPU_DIM,GPU_DIM)]( + grid, len(atom_coords), atom_coords, atom_mask, layer_offset, + batch_idx, width, res, center, rot, point_radius, point_type, acc_type + ) def make_tensor(shape): @@ -371,7 +548,7 @@ def get_batch(data, batch_size=16, batch_set=None, width=48, res=0.5, def get_raw_batch(r_coords, r_types, p_coords, p_types, rec_typer, lig_typer, conn, num_samples=32, width=24, res=1, fixed_rot=None, - point_radius=1.5, point_type=0, acc_type=0): + point_radius=1.5, point_type=0, acc_type=0, cpu=False): """Sample a raw batch with provided atom coordinates. Args: @@ -383,14 +560,22 @@ def get_raw_batch(r_coords, r_types, p_coords, p_types, rec_typer, lig_typer, num_samples: number of rotations to sample width: grid width res: grid resolution - rec_channels: number of receptor channels - parent_channels: number of parent chanels + fixed_rot: None or a fixed 4-element rotation vector + point_radius: atom radius in Angstroms + point_type: shape of the atom densities + acc_type: atom density accumulation type + cpu: if True, generate batches with cpu_gridify """ B = num_samples T = rec_typer.size() + lig_typer.size() N = width - torch_grid, cuda_grid = make_tensor((B,T,N,N,N)) + if cpu: + t = np.zeros((B,T,N,N,N)) + torch_grid = t + cuda_grid = t + else: + torch_grid, cuda_grid = make_tensor((B,T,N,N,N)) r_mask = np.zeros(len(r_types), dtype=np.uint32) p_mask = np.zeros(len(p_types), dtype=np.uint32) @@ -418,7 +603,8 @@ def get_raw_batch(r_coords, r_types, p_coords, p_types, rec_typer, lig_typer, rot=rot, point_radius=point_radius, point_type=point_type, - acc_type=acc_type + acc_type=acc_type, + cpu=cpu ) mol_gridify( @@ -433,7 +619,8 @@ def get_raw_batch(r_coords, r_types, p_coords, p_types, rec_typer, lig_typer, rot=rot, point_radius=point_radius, point_type=point_type, - acc_type=acc_type + acc_type=acc_type, + cpu=cpu ) return torch_grid diff --git a/leadopt/model_conf.py b/leadopt/model_conf.py index cc2a4c4..435682a 100644 --- a/leadopt/model_conf.py +++ b/leadopt/model_conf.py @@ -19,10 +19,14 @@ import torch import torch.nn as nn import torch.nn.functional as F -import wandb import tqdm import numpy as np +try: + import wandb +except: + pass + from leadopt.models.voxel import VoxelFingerprintNet from leadopt.data_util import FragmentDataset, SharedFragmentDataset, FingerprintDataset, LIG_TYPER,\ REC_TYPER @@ -30,7 +34,7 @@ from leadopt.metrics import mse, bce, tanimoto, cos, top_k_acc,\ average_support, inside_support -from config import partitions, moad_partitions +from config import moad_partitions def get_bios(p): @@ -309,36 +313,6 @@ def init_models(self): def load_data(self): print('[*] Loading data...', flush=True) - # train_dat = FragmentDataset( - # self._args['fragments'], - # rec_typer=REC_TYPER[self._args['rec_typer']], - # lig_typer=LIG_TYPER[self._args['lig_typer']], - # # filter_rec=( - # # partitions.TRAIN if not self._args['no_partitions'] else None), - # filter_rec=set(get_bios(moad_partitions.TRAIN)), - # filter_smi=set(moad_partitions.TRAIN_SMI), - # fdist_min=self._args['fdist_min'], - # fdist_max=self._args['fdist_max'], - # fmass_min=self._args['fmass_min'], - # fmass_max=self._args['fmass_max'], - # verbose=True - # ) - - # val_dat = FragmentDataset( - # self._args['fragments'], - # rec_typer=REC_TYPER[self._args['rec_typer']], - # lig_typer=LIG_TYPER[self._args['lig_typer']], - # # filter_rec=( - # # partitions.VAL if not self._args['no_partitions'] else None), - # filter_rec=set(get_bios(moad_partitions.VAL)), - # filter_smi=set(moad_partitions.VAL_SMI), - # fdist_min=self._args['fdist_min'], - # fdist_max=self._args['fdist_max'], - # fmass_min=self._args['fmass_min'], - # fmass_max=self._args['fmass_max'], - # verbose=True - # ) - dat = FragmentDataset( self._args['fragments'], rec_typer=REC_TYPER[self._args['rec_typer']], diff --git a/leadopt/util.py b/leadopt/util.py index 9c6fecc..ee4013c 100644 --- a/leadopt/util.py +++ b/leadopt/util.py @@ -19,10 +19,10 @@ import numpy as np -try: - import pybel -except: - from openbabel import pybel +# try: +# import pybel +# except: +# from openbabel import pybel from rdkit import Chem @@ -164,32 +164,45 @@ def load_receptor(rec_path): return rec +# def load_receptor_ob(rec_path): +# rec = next(pybel.readfile('pdb', rec_path)) +# valid = [r for r in rec.residues if r.name != 'HOH'] + +# # map partial charge into byte range +# def conv_charge(x): +# x = max(x,-0.5) +# x = min(x,0.5) +# x += 0.5 +# x *= 255 +# x = int(x) +# return x + +# coords = [] +# types = [] +# for v in valid: +# coords += [k.coords for k in v.atoms] +# types += [( +# k.atomicnum, +# int(k.OBAtom.IsAromatic()), +# int(k.OBAtom.IsHbondDonor()), +# int(k.OBAtom.IsHbondAcceptor()), +# conv_charge(k.OBAtom.GetPartialCharge()) +# ) for k in v.atoms] + +# return np.array(coords), np.array(types) + + def load_receptor_ob(rec_path): - rec = next(pybel.readfile('pdb', rec_path)) - valid = [r for r in rec.residues if r.name != 'HOH'] - - # map partial charge into byte range - def conv_charge(x): - x = max(x,-0.5) - x = min(x,0.5) - x += 0.5 - x *= 255 - x = int(x) - return x - - coords = [] - types = [] - for v in valid: - coords += [k.coords for k in v.atoms] - types += [( - k.atomicnum, - int(k.OBAtom.IsAromatic()), - int(k.OBAtom.IsHbondDonor()), - int(k.OBAtom.IsHbondAcceptor()), - conv_charge(k.OBAtom.GetPartialCharge()) - ) for k in v.atoms] - - return np.array(coords), np.array(types) + rec = load_receptor(rec_path) + + coords = get_coords(rec) + types = np.array(get_types(rec)) + types = np.concatenate([ + types.reshape(-1,1), + np.zeros((len(types), 4)) + ], 1) + + return coords, types def get_connection_point(frag):