Skip to content

Commit

Permalink
DDI and DTI tested; release 0.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
kexinhuang12345 committed May 3, 2021
1 parent 8d93b94 commit fc750f7
Show file tree
Hide file tree
Showing 9 changed files with 232 additions and 60 deletions.
153 changes: 153 additions & 0 deletions DEMO/GNN_Models_Release_Example.ipynb

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions DeepPurpose/CompoundPred.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ def __init__(self, **config):
self.model_drug = DGL_GIN_AttrMasking(predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'DGL_GIN_ContextPred':
self.model_drug = DGL_GIN_ContextPred(predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'AttentiveFP':
self.model_drug = AttentiveFP(node_feat_size = 39,
elif drug_encoding == 'DGL_AttentiveFP':
self.model_drug = DGL_AttentiveFP(node_feat_size = 39,
edge_feat_size = 11,
num_layers = config['gnn_num_layers'],
num_timesteps = config['attentivefp_num_timesteps'],
Expand Down Expand Up @@ -203,7 +203,7 @@ def test_(self, data_generator, model, repurposing_mode = False, test = False, v
y_label = []
model.eval()
for i, (v_d, label) in enumerate(data_generator):
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = v_d
else:
v_d = v_d.float().to(self.device)
Expand Down Expand Up @@ -285,7 +285,7 @@ def train(self, train, val, test = None, verbose = True):
'drop_last': False}
if (self.drug_encoding == "MPNN"):
params['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params['collate_fn'] = dgl_collate_func

training_generator = data.DataLoader(data_process_loader_Property_Prediction(train.index.values,
Expand All @@ -307,7 +307,7 @@ def train(self, train, val, test = None, verbose = True):

if (self.drug_encoding == "MPNN"):
params_test['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_GAT', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.drug_encoding in ['DGL_GCN', 'DGL_GAT', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params_test['collate_fn'] = dgl_collate_func
testing_generator = data.DataLoader(data_process_loader_Property_Prediction(test.index.values, test.Label.values, test, **self.config), **params_test)

Expand All @@ -333,7 +333,7 @@ def train(self, train, val, test = None, verbose = True):
for epo in range(train_epoch):
for i, (v_d, label) in enumerate(training_generator):

if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = v_d
else:
v_d = v_d.float().to(self.device)
Expand Down Expand Up @@ -462,7 +462,7 @@ def predict(self, df_data, verbose = True):

if (self.drug_encoding == "MPNN"):
params['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params['collate_fn'] = dgl_collate_func

generator = data.DataLoader(info, **params)
Expand Down
71 changes: 39 additions & 32 deletions DeepPurpose/DDI.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,35 +63,12 @@ def model_pretrained(path_dir = None, model = None):
model.load_pretrained(path_dir + '/model.pt')
return model

# def mpnn_feature_collate_func(x):
# ## first version
# return [torch.cat([x[j][i] for j in range(len(x))], 0) for i in range(len(x[0]))]

# def mpnn_feature_collate_func(x):
# assert len(x[0]) == 5
# N_atoms_N_bonds = [i[-1] for i in x]
# N_atoms_scope = []
# f_a = torch.cat([x[j][0] for j in range(len(x))], 0)
# f_b = torch.cat([x[j][1] for j in range(len(x))], 0)
# agraph_lst, bgraph_lst = [], []
# Na, Nb = 0, 0
# for j in range(len(x)):
# agraph_lst.append(x[j][2] + Na)
# bgraph_lst.append(x[j][3] + Nb)
# N_atoms_scope.append([Na, x[j][2].shape[0]])
# Na += x[j][2].shape[0]
# Nb += x[j][3].shape[0]
# agraph = torch.cat(agraph_lst, 0)
# bgraph = torch.cat(bgraph_lst, 0)
# return [f_a, f_b, agraph, bgraph, N_atoms_scope]

# def mpnn_collate_func(x):
# mpnn_feature = [i[0] for i in x]
# mpnn_feature = mpnn_feature_collate_func(mpnn_feature)
# from torch.utils.data.dataloader import default_collate
# x_remain = [[i[1], i[2]] for i in x]
# x_remain_collated = default_collate(x_remain)
# return [mpnn_feature] + x_remain_collated
def dgl_collate_func(x):
d1, d2, y = zip(*x)
import dgl
d1 = dgl.batch(d1)
d2 = dgl.batch(d2)
return d1, d2, torch.tensor(y)

class DDI_Model:

Expand All @@ -110,6 +87,31 @@ def __init__(self, **config):
self.model_drug = transformer('drug', **config)
elif drug_encoding == 'MPNN':
self.model_drug = MPNN(config['hidden_dim_drug'], config['mpnn_depth'])
elif drug_encoding == 'DGL_GCN':
self.model_drug = DGL_GCN(in_feats = 74,
hidden_feats = [config['gnn_hid_dim_drug']] * config['gnn_num_layers'],
activation = [config['gnn_activation']] * config['gnn_num_layers'],
predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'DGL_NeuralFP':
self.model_drug = DGL_NeuralFP(in_feats = 74,
hidden_feats = [config['gnn_hid_dim_drug']] * config['gnn_num_layers'],
max_degree = config['neuralfp_max_degree'],
activation = [config['gnn_activation']] * config['gnn_num_layers'],
predictor_hidden_size = config['neuralfp_predictor_hid_dim'],
predictor_dim = config['hidden_dim_drug'],
predictor_activation = config['neuralfp_predictor_activation'])
elif drug_encoding == 'DGL_GIN_AttrMasking':
self.model_drug = DGL_GIN_AttrMasking(predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'DGL_GIN_ContextPred':
self.model_drug = DGL_GIN_ContextPred(predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'DGL_AttentiveFP':
self.model_drug = DGL_AttentiveFP(node_feat_size = 39,
edge_feat_size = 11,
num_layers = config['gnn_num_layers'],
num_timesteps = config['attentivefp_num_timesteps'],
graph_feat_size = config['gnn_hid_dim_drug'],
predictor_dim = config['hidden_dim_drug'])

else:
raise AttributeError('Please use one of the available encoding method.')

Expand All @@ -132,7 +134,7 @@ def test_(self, data_generator, model, repurposing_mode = False, test = False):
y_label = []
model.eval()
for i, (v_d, v_p, label) in enumerate(data_generator):
if self.drug_encoding == "MPNN" or self.drug_encoding == 'Transformer':
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = v_d
v_p = v_p
else:
Expand Down Expand Up @@ -206,6 +208,8 @@ def train(self, train, val, test = None, verbose = True):
'drop_last': False}
if (self.drug_encoding == "MPNN"):
params['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params['collate_fn'] = dgl_collate_func

training_generator = data.DataLoader(data_process_DDI_loader(train.index.values, train.Label.values, train, **self.config), **params)
validation_generator = data.DataLoader(data_process_DDI_loader(val.index.values, val.Label.values, val, **self.config), **params)
Expand All @@ -220,6 +224,8 @@ def train(self, train, val, test = None, verbose = True):

if (self.drug_encoding == "MPNN"):
params_test['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params_test['collate_fn'] = dgl_collate_func
testing_generator = data.DataLoader(data_process_DDI_loader(test.index.values, test.Label.values, test, **self.config), **params_test)

# early stopping
Expand All @@ -242,7 +248,7 @@ def train(self, train, val, test = None, verbose = True):
t_start = time()
for epo in range(train_epoch):
for i, (v_d, v_p, label) in enumerate(training_generator):
if self.drug_encoding == "MPNN" or self.drug_encoding == 'Transformer':
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = v_d
v_p = v_p
else:
Expand Down Expand Up @@ -372,7 +378,8 @@ def predict(self, df_data):

if (self.drug_encoding == "MPNN"):
params['collate_fn'] = mpnn_collate_func

elif self.drug_encoding in ['DGL_GCN', 'DGL_GAT', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params_test['collate_fn'] = dgl_collate_func

generator = data.DataLoader(info, **params)

Expand Down
20 changes: 12 additions & 8 deletions DeepPurpose/DTI.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,6 @@ def virtual_screening(X_repurpose, target, model, drug_names = None, target_name

def dgl_collate_func(x):
d, p, y = zip(*x)
print(d)
print(p)
print(y)
import dgl
d = dgl.batch(d)
return d, torch.tensor(p), torch.tensor(y)
Expand Down Expand Up @@ -256,6 +253,13 @@ def __init__(self, **config):
self.model_drug = DGL_GIN_AttrMasking(predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'DGL_GIN_ContextPred':
self.model_drug = DGL_GIN_ContextPred(predictor_dim = config['hidden_dim_drug'])
elif drug_encoding == 'DGL_AttentiveFP':
self.model_drug = DGL_AttentiveFP(node_feat_size = 39,
edge_feat_size = 11,
num_layers = config['gnn_num_layers'],
num_timesteps = config['attentivefp_num_timesteps'],
graph_feat_size = config['gnn_hid_dim_drug'],
predictor_dim = config['hidden_dim_drug'])
else:
raise AttributeError('Please use one of the available encoding method.')

Expand Down Expand Up @@ -297,7 +301,7 @@ def test_(self, data_generator, model, repurposing_mode = False, test = False):
y_label = []
model.eval()
for i, (v_d, v_p, label) in enumerate(data_generator):
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = v_d
else:
v_d = v_d.float().to(self.device)
Expand Down Expand Up @@ -376,7 +380,7 @@ def train(self, train, val = None, test = None, verbose = True):
'drop_last': False}
if (self.drug_encoding == "MPNN"):
params['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params['collate_fn'] = dgl_collate_func

training_generator = data.DataLoader(data_process_loader(train.index.values, train.Label.values, train, **self.config), **params)
Expand All @@ -393,7 +397,7 @@ def train(self, train, val = None, test = None, verbose = True):

if (self.drug_encoding == "MPNN"):
params_test['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params_test['collate_fn'] = dgl_collate_func
testing_generator = data.DataLoader(data_process_loader(test.index.values, test.Label.values, test, **self.config), **params_test)

Expand Down Expand Up @@ -423,7 +427,7 @@ def train(self, train, val = None, test = None, verbose = True):
v_p = v_p
else:
v_p = v_p.float().to(self.device)
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
if self.drug_encoding in ["MPNN", 'Transformer', 'DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = v_d
else:
v_d = v_d.float().to(self.device)
Expand Down Expand Up @@ -563,7 +567,7 @@ def predict(self, df_data):

if (self.drug_encoding == "MPNN"):
params['collate_fn'] = mpnn_collate_func
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.drug_encoding in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
params['collate_fn'] = dgl_collate_func

generator = data.DataLoader(info, **params)
Expand Down
4 changes: 2 additions & 2 deletions DeepPurpose/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,10 +414,10 @@ def forward(self, bg):
return self.transform(graph_feats)


class AttentiveFP(nn.Module):
class DGL_AttentiveFP(nn.Module):
## adapted from https://github.com/awslabs/dgl-lifesci/blob/2fbf5fd6aca92675b709b6f1c3bc3c6ad5434e96/python/dgllife/model/model_zoo/attentivefp_predictor.py#L17
def __init__(self, node_feat_size, edge_feat_size, num_layers = 2, num_timesteps = 2, graph_feat_size = 200, predictor_dim=None):
super(AttentiveFP, self).__init__()
super(DGL_AttentiveFP, self).__init__()
from dgllife.model.gnn import AttentiveFPGNN
from dgllife.model.readout import AttentiveFPReadout

Expand Down
20 changes: 10 additions & 10 deletions DeepPurpose/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def encode_drug(df_data, drug_encoding, column_name = 'SMILES', save_column_name
df_data[save_column_name] = [unique_dict[i] for i in df_data[column_name]]
elif drug_encoding in ['DGL_GCN', 'DGL_NeuralFP']:
df_data[save_column_name] = df_data[column_name]
elif drug_encoding == 'AttentiveFP':
elif drug_encoding == 'DGL_AttentiveFP':
df_data[save_column_name] = df_data[column_name]
elif drug_encoding in ['DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred']:
df_data[save_column_name] = df_data[column_name]
Expand Down Expand Up @@ -632,7 +632,7 @@ def __init__(self, list_IDs, labels, df, **config):
from functools import partial
self.fc = partial(smiles_to_bigraph, add_self_loop=True)

elif self.config['drug_encoding'] == 'AttentiveFP':
elif self.config['drug_encoding'] == 'DGL_AttentiveFP':
from dgllife.utils import smiles_to_bigraph, AttentiveFPAtomFeaturizer, AttentiveFPBondFeaturizer
self.node_featurizer = AttentiveFPAtomFeaturizer()
self.edge_featurizer = AttentiveFPBondFeaturizer(self_loop=True)
Expand All @@ -656,7 +656,7 @@ def __getitem__(self, index):
v_d = self.df.iloc[index]['drug_encoding']
if self.config['drug_encoding'] == 'CNN' or self.config['drug_encoding'] == 'CNN_RNN':
v_d = drug_2_embed(v_d)
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = self.fc(smiles = v_d, node_featurizer = self.node_featurizer, edge_featurizer = self.edge_featurizer)
v_p = self.df.iloc[index]['target_encoding']
if self.config['target_encoding'] == 'CNN' or self.config['target_encoding'] == 'CNN_RNN':
Expand All @@ -681,7 +681,7 @@ def __init__(self, list_IDs, labels, df, **config):
from functools import partial
self.fc = partial(smiles_to_bigraph, add_self_loop=True)

elif self.config['drug_encoding'] == 'AttentiveFP':
elif self.config['drug_encoding'] == 'DGL_AttentiveFP':
from dgllife.utils import smiles_to_bigraph, AttentiveFPAtomFeaturizer, AttentiveFPBondFeaturizer
self.node_featurizer = AttentiveFPAtomFeaturizer()
self.edge_featurizer = AttentiveFPBondFeaturizer(self_loop=True)
Expand All @@ -704,12 +704,12 @@ def __getitem__(self, index):
v_d = self.df.iloc[index]['drug_encoding_1']
if self.config['drug_encoding'] == 'CNN' or self.config['drug_encoding'] == 'CNN_RNN':
v_d = drug_2_embed(v_d)
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = self.fc(smiles = v_d, node_featurizer = self.node_featurizer, edge_featurizer = self.edge_featurizer)
v_p = self.df.iloc[index]['drug_encoding_2']
if self.config['drug_encoding'] == 'CNN' or self.config['drug_encoding'] == 'CNN_RNN':
v_p = drug_2_embed(v_p)
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_p = self.fc(smiles = v_p, node_featurizer = self.node_featurizer, edge_featurizer = self.edge_featurizer)
y = self.labels[index]
return v_d, v_p, y
Expand Down Expand Up @@ -756,7 +756,7 @@ def __init__(self, list_IDs, labels, df, **config):
from functools import partial
self.fc = partial(smiles_to_bigraph, add_self_loop=True)

elif self.config['drug_encoding'] == 'AttentiveFP':
elif self.config['drug_encoding'] == 'DGL_AttentiveFP':
from dgllife.utils import smiles_to_bigraph, AttentiveFPAtomFeaturizer, AttentiveFPBondFeaturizer
self.node_featurizer = AttentiveFPAtomFeaturizer()
self.edge_featurizer = AttentiveFPBondFeaturizer(self_loop=True)
Expand All @@ -780,7 +780,7 @@ def __getitem__(self, index):
v_d = self.df.iloc[index]['drug_encoding']
if self.config['drug_encoding'] == 'CNN' or self.config['drug_encoding'] == 'CNN_RNN':
v_d = drug_2_embed(v_d)
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'AttentiveFP']:
elif self.config['drug_encoding'] in ['DGL_GCN', 'DGL_NeuralFP', 'DGL_GIN_AttrMasking', 'DGL_GIN_ContextPred', 'DGL_AttentiveFP']:
v_d = self.fc(smiles = v_d, node_featurizer = self.node_featurizer, edge_featurizer = self.edge_featurizer)
y = self.labels[index]
return v_d, y
Expand Down Expand Up @@ -855,7 +855,7 @@ def generate_config(drug_encoding = None, target_encoding = None,
gnn_activation = F.relu,
neuralfp_max_degree = 10,
neuralfp_predictor_hid_dim = 128,
neuralfp_predictor_activation = F.tanh,
neuralfp_predictor_activation = torch.tanh,
attentivefp_num_timesteps = 2
):

Expand Down Expand Up @@ -938,7 +938,7 @@ def generate_config(drug_encoding = None, target_encoding = None,
elif drug_encoding == 'DGL_GIN_ContextPred':
## loaded pretrained model specifications
pass
elif drug_encoding == 'AttentiveFP':
elif drug_encoding == 'DGL_AttentiveFP':
base_config['gnn_hid_dim_drug'] = gnn_hid_dim_drug
base_config['gnn_num_layers'] = gnn_num_layers
base_config['attentivefp_num_timesteps'] = attentivefp_num_timesteps
Expand Down
Loading

0 comments on commit fc750f7

Please sign in to comment.