diff --git a/dgym/collection.py b/dgym/collection.py index 05a1d61..0e7e270 100644 --- a/dgym/collection.py +++ b/dgym/collection.py @@ -2,6 +2,8 @@ # ============================================================================= # IMPORTS # ============================================================================= +import copy +import random import dgym as dg import torch import random @@ -9,6 +11,7 @@ import numpy as np import pandas as pd from rdkit import Chem +from functools import partial from dgym.molecule import Molecule from dgym.reaction import LazyReaction from typing import Union, Iterable, Optional, List, Any, Callable @@ -194,7 +197,6 @@ def __setitem__(self, key: Any, value: Any): def shuffle(self, seed=None): """ Shuffle the collection and return it. """ - import random if seed is not None: random.seed(seed) random.shuffle(self._items) @@ -334,7 +336,6 @@ def unique(self): def copy(self, unique=False): """ Return a copy of self. """ - import copy return self.__class__(copy.deepcopy(self._items)) def view( @@ -357,8 +358,6 @@ def view( torch.utils.data.DataLoader Resulting data loader. """ - from functools import partial - if collate_fn is None: # provide default collate function collate_fn = self._batch @@ -524,7 +523,6 @@ def from_json( From SmilesClickChem: https://zenodo.org/record/4100676 """ - import pandas as pd # load from JSON reactions_df = pd.read_json(path).T.reset_index(drop=True) diff --git a/dgym/datasets.py b/dgym/datasets.py index 3924531..6393b3f 100644 --- a/dgym/datasets.py +++ b/dgym/datasets.py @@ -1,3 +1,4 @@ +import chemfp import pandas as pd from rdkit import Chem from typing import Optional @@ -5,7 +6,6 @@ from rdkit.Chem import PandasTools from dgym.reaction import Reaction -import chemfp chemfp.set_license('20241121-columbia.edu@DAAAPLPPDDKGPECJIJJGFNBEPIIKHOOMFAOG') def fingerprints(path): diff --git a/dgym/envs/oracle.py b/dgym/envs/oracle.py index 90038e7..347f8d1 100644 --- a/dgym/envs/oracle.py +++ b/dgym/envs/oracle.py @@ -17,6 +17,14 @@ from sklearn.preprocessing import normalize from scikit_mol.descriptors import MolecularDescriptorTransformer +import re +import glob +from itertools import islice + +import subprocess +import os +import shutil +import tempfile class OracleCache(dict): def __missing__(self, key): @@ -418,10 +426,6 @@ def _convert_units( def _gather_results(self, directory: str): - - import re - import glob - from itertools import islice scores = [] smiles = [] @@ -451,7 +455,6 @@ def _gather_results(self, directory: str): return smiles, scores def _dock(self, command: str): - import subprocess return subprocess.run( command, shell=True, @@ -480,10 +483,7 @@ def _prepare_command(self, config, directory: str): return ' '.join(['unidock', *inputs]) - def _prepare_ligands(self, molecules, directory: str): - - import os - + def _prepare_ligands(self, molecules, directory: str): failed = [] paths = [] for idx, mol in enumerate(molecules): @@ -548,8 +548,6 @@ def _compute_deltaG(self, energies, temperature=298.15): @contextmanager def _managed_directory(self, dir_path=None): - import shutil - import tempfile is_temp_dir = False if dir_path is None: dir_path = tempfile.mkdtemp() diff --git a/dgym/utils.py b/dgym/utils.py index 2f2a1bc..0bc172a 100644 --- a/dgym/utils.py +++ b/dgym/utils.py @@ -12,6 +12,8 @@ from rdkit import Chem from rdkit.Chem import rdFMCS, AllChem, Draw +import chemfp +chemfp.set_license('20231114-columbia.edu@DAAABLGMDNEEHFALIFOLIONPFHFDJDOLHABF') __all__ = [ 'sort_fingerprints', 'partition_building_blocks', @@ -20,10 +22,6 @@ 'align_lineage' ] -import chemfp -chemfp.set_license('20231114-columbia.edu@DAAABLGMDNEEHFALIFOLIONPFHFDJDOLHABF') - - # Building block processing. # ----------------------------------------------- def compute_fingerprints(sdf_path: str = './', out_path: str = './out'): diff --git a/scripts/screen/adam17/.ipynb_checkpoints/adam17_screen-checkpoint.py b/scripts/screen/adam17/.ipynb_checkpoints/adam17_screen-checkpoint.py index 39183ff..19b9e32 100644 --- a/scripts/screen/adam17/.ipynb_checkpoints/adam17_screen-checkpoint.py +++ b/scripts/screen/adam17/.ipynb_checkpoints/adam17_screen-checkpoint.py @@ -8,6 +8,13 @@ import random import dgym as dg +import torch +import pyarrow.parquet as pq +# Docking oracles +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.designer import Designer, Generator +import uuid def get_data(path): @@ -25,19 +32,12 @@ def get_data(path): building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) return deck, reactions, building_blocks, fingerprints, sizes def get_oracles(path, sigma=0.1): - - # Docking oracles - from dgym.envs.oracle import DockingOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction - config = { 'center_x': 44.294, 'center_y': 28.123, @@ -116,7 +116,6 @@ def get_molecules(deck, batch_size): sizes ) = get_data(path) -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -147,7 +146,6 @@ def get_molecules(deck, batch_size): batch_size = 300 # Check if file already exists -import uuid file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv' for _ in range(10): diff --git a/scripts/screen/adam17/adam17_screen.py b/scripts/screen/adam17/adam17_screen.py index 39183ff..22c6752 100644 --- a/scripts/screen/adam17/adam17_screen.py +++ b/scripts/screen/adam17/adam17_screen.py @@ -8,6 +8,14 @@ import random import dgym as dg +import torch +import pyarrow.parquet as pq + +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +from dgym.envs.designer import Designer, Generator +import uuid def get_data(path): @@ -25,8 +33,6 @@ def get_data(path): building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -35,9 +41,6 @@ def get_data(path): def get_oracles(path, sigma=0.1): # Docking oracles - from dgym.envs.oracle import DockingOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction - config = { 'center_x': 44.294, 'center_y': 28.123, @@ -116,7 +119,6 @@ def get_molecules(deck, batch_size): sizes ) = get_data(path) -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -147,7 +149,6 @@ def get_molecules(deck, batch_size): batch_size = 300 # Check if file already exists -import uuid file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv' for _ in range(10): diff --git a/scripts/screen/dockstring/.ipynb_checkpoints/adam17_screen-checkpoint.py b/scripts/screen/dockstring/.ipynb_checkpoints/adam17_screen-checkpoint.py index 39183ff..47e5dbd 100644 --- a/scripts/screen/dockstring/.ipynb_checkpoints/adam17_screen-checkpoint.py +++ b/scripts/screen/dockstring/.ipynb_checkpoints/adam17_screen-checkpoint.py @@ -8,6 +8,13 @@ import random import dgym as dg +import torch +import pyarrow.parquet as pq +# Docking oracles +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.designer import Designer, Generator +import uuid def get_data(path): @@ -25,8 +32,6 @@ def get_data(path): building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -34,10 +39,6 @@ def get_data(path): def get_oracles(path, sigma=0.1): - # Docking oracles - from dgym.envs.oracle import DockingOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction - config = { 'center_x': 44.294, 'center_y': 28.123, @@ -116,8 +117,6 @@ def get_molecules(deck, batch_size): sizes ) = get_data(path) -from dgym.envs.designer import Designer, Generator - designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, @@ -147,7 +146,6 @@ def get_molecules(deck, batch_size): batch_size = 300 # Check if file already exists -import uuid file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv' for _ in range(10): diff --git a/scripts/screen/dockstring/dockstring_screen.py b/scripts/screen/dockstring/dockstring_screen.py index e0ed5e1..ec78ace 100644 --- a/scripts/screen/dockstring/dockstring_screen.py +++ b/scripts/screen/dockstring/dockstring_screen.py @@ -1,5 +1,17 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq +from dgym.envs.designer import Designer, Generator +# select first molecule +import random +import os +from dgym.envs.oracle import DockingOracle +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +import os + def get_data(path): @@ -19,8 +31,6 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -34,17 +44,13 @@ def get_molecules( fingerprints, sizes, ): - - from dgym.envs.designer import Designer, Generator - + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True ) - # select first molecule - import random def select_molecule(deck): initial_index = random.randint(0, len(deck)) @@ -75,7 +81,6 @@ def select_molecule(deck): def get_docking_config(path, target_index, scorer): - import os dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -107,7 +112,6 @@ def get_docking_config(path, target_index, scorer): def get_oracle(path: str, target_index: int, scorer: str): - from dgym.envs.oracle import DockingOracle # Create noiseless evaluators name, receptor_path, config = get_docking_config(path, target_index, scorer=scorer) @@ -124,9 +128,6 @@ def get_oracle(path: str, target_index: int, scorer: str): args = parser.parse_args() # Run experiment -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator # Load all data path = '../../../../dgym-data' @@ -163,7 +164,6 @@ def get_oracle(path: str, target_index: int, scorer: str): results_df['scorer'] = args.scorer # Write to disk -import os file_path = f'{args.out_dir}/screen_targets_{args.target_index}_{args.scorer}.tsv' results_df.to_csv( diff --git a/scripts/screen/mpo/.ipynb_checkpoints/adam17_screen-checkpoint.py b/scripts/screen/mpo/.ipynb_checkpoints/adam17_screen-checkpoint.py index 39183ff..fa0f0b7 100644 --- a/scripts/screen/mpo/.ipynb_checkpoints/adam17_screen-checkpoint.py +++ b/scripts/screen/mpo/.ipynb_checkpoints/adam17_screen-checkpoint.py @@ -8,6 +8,15 @@ import random import dgym as dg +import torch +import pyarrow.parquet as pq +# Docking oracles +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.designer import Designer, Generator +import uuid + + def get_data(path): @@ -25,8 +34,7 @@ def get_data(path): building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -34,10 +42,6 @@ def get_data(path): def get_oracles(path, sigma=0.1): - # Docking oracles - from dgym.envs.oracle import DockingOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction - config = { 'center_x': 44.294, 'center_y': 28.123, @@ -116,7 +120,6 @@ def get_molecules(deck, batch_size): sizes ) = get_data(path) -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -147,7 +150,6 @@ def get_molecules(deck, batch_size): batch_size = 300 # Check if file already exists -import uuid file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv' for _ in range(10): diff --git a/scripts/screen/mpo/mpo_screen.py b/scripts/screen/mpo/mpo_screen.py index dcc27a4..7564d66 100644 --- a/scripts/screen/mpo/mpo_screen.py +++ b/scripts/screen/mpo/mpo_screen.py @@ -1,5 +1,14 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq +from dgym.envs.designer import Designer, Generator +import random +import os +from dgym.envs.oracle import DockingOracle, CatBoostOracle, RDKitOracle +import pandas as pd +import os + def get_data(path): @@ -19,8 +28,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -35,7 +43,7 @@ def get_molecules( sizes, ): - from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -44,7 +52,7 @@ def get_molecules( ) # select first molecule - import random + def select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) @@ -75,7 +83,7 @@ def select_molecule(deck): def get_docking_config(path, target_index): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -107,7 +115,7 @@ def get_docking_config(path, target_index): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import DockingOracle, CatBoostOracle, RDKitOracle + # Create noiseless evaluators name, receptor_path, config = get_docking_config(path, target_index) @@ -126,7 +134,7 @@ def get_oracles(path: str, target_index: int): args = parser.parse_args() # Run experiment -import pandas as pd + # dg.envs.utility.MultipleUtilityFunction # Load all data @@ -175,7 +183,6 @@ def get_oracles(path: str, target_index: int): ) # Write to disk -import os file_path = f'{args.out_dir}/screen_mpo.tsv' results_df.to_csv( diff --git a/scripts/selection/batch_size/.ipynb_checkpoints/selection_noise-checkpoint.py b/scripts/selection/batch_size/.ipynb_checkpoints/selection_noise-checkpoint.py index b071706..e4247e6 100644 --- a/scripts/selection/batch_size/.ipynb_checkpoints/selection_noise-checkpoint.py +++ b/scripts/selection/batch_size/.ipynb_checkpoints/selection_noise-checkpoint.py @@ -1,6 +1,22 @@ import argparse import dgym as dg +# Docking oracles +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -18,9 +34,6 @@ building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') -# Docking oracles -from dgym.envs.oracle import DockingOracle, NoisyOracle -from dgym.envs.utility import ClassicUtilityFunction config = { 'center_x': 44.294, @@ -63,13 +76,7 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment + designer = Designer( Generator(building_blocks, fingerprints), @@ -113,9 +120,6 @@ result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names file_path = f'{args.out_dir}/selection_noise_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/batch_size/.ipynb_checkpoints/selection_temperature-checkpoint.py b/scripts/selection/batch_size/.ipynb_checkpoints/selection_temperature-checkpoint.py index a9d6544..3b39a42 100644 --- a/scripts/selection/batch_size/.ipynb_checkpoints/selection_temperature-checkpoint.py +++ b/scripts/selection/batch_size/.ipynb_checkpoints/selection_temperature-checkpoint.py @@ -3,6 +3,30 @@ import dgym as dg import pandas as pd +import torch +import pyarrow.parquet as pq + +import random +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) + +from copy import deepcopy +from dgym.envs.designer import Designer, Generator +from dgym.envs import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +45,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +54,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +70,6 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +99,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -101,9 +120,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -124,7 +141,6 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -174,7 +190,6 @@ def get_agent_sequence(temperature: float): ) = get_data(path) # Get starting library -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -201,7 +216,6 @@ def get_agent_sequence(temperature: float): ) # Create DrugEnv -from dgym.envs import DrugEnv drug_env = DrugEnv( designer = designer, library = library, @@ -210,8 +224,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence(temperature = args.temperature) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -220,16 +233,12 @@ def get_agent_sequence(temperature: float): ) # Create and run Experiment -from dgym.experiment import Experiment experiment = Experiment( drug_agent=drug_agent, drug_env=drug_env) file_path = f'{args.out_dir}/selection_temperature_{args.temperature}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names result_serialized = serialize_with_class_names(result) json.dump(result_serialized, open(file_path, 'w')) \ No newline at end of file diff --git a/scripts/selection/batch_size/selection_batch_size.py b/scripts/selection/batch_size/selection_batch_size.py index 5239c9f..afdc416 100644 --- a/scripts/selection/batch_size/selection_batch_size.py +++ b/scripts/selection/batch_size/selection_batch_size.py @@ -1,6 +1,28 @@ import uuid import argparse import dgym as dg +import torch +import pyarrow.parquet as pq +import random +import os +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.utility import ( + ClassicUtilityFunction, Policy +) +from copy import deepcopy +from dgym.envs.designer import Designer, Generator +import json +from dgym.envs import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment +import json +import uuid +from utils import serialize_with_class_names + + def get_data(path): @@ -20,8 +42,6 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -30,7 +50,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -47,7 +66,6 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -76,10 +94,6 @@ def get_docking_config(path: str, target_index: int): return target, config def get_oracles(path: str, target_index: int): - - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction target, config = get_docking_config(path, target_index) @@ -100,9 +114,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, Policy - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -123,7 +135,6 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy utility_agent = Policy( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -185,7 +196,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded data.', flush=True) # Get starting library -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -205,7 +215,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded oracles.', flush=True) # Load experiment state off disk if available -import json try: with open(args.experiment_state_path, 'r') as f: experiment_state = json.load(f) @@ -229,7 +238,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded utility functions.', flush=True) # Create DrugEnv -from dgym.envs import DrugEnv drug_env = DrugEnv( designer = designer, library = library, @@ -240,8 +248,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded DrugEnv.', flush=True) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy sequence = get_agent_sequence(batch_size=args.batch_size, score_ratio=args.score_ratio) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -251,16 +257,12 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded DrugAgent.', flush=True) # Create and run Experiment -from dgym.experiment import Experiment experiment = Experiment(drug_agent=drug_agent, drug_env=drug_env).load(experiment_state) file_path = args.experiment_state_path \ or f'{args.out_dir}/selection_batch_size_{args.batch_size}_score_ratio_{args.score_ratio}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names result_serialized = serialize_with_class_names(result) with open(file_path, 'w') as f: diff --git a/scripts/selection/exploration/.ipynb_checkpoints/selection_batch_size-checkpoint.py b/scripts/selection/exploration/.ipynb_checkpoints/selection_batch_size-checkpoint.py index 51e8792..18603a2 100644 --- a/scripts/selection/exploration/.ipynb_checkpoints/selection_batch_size-checkpoint.py +++ b/scripts/selection/exploration/.ipynb_checkpoints/selection_batch_size-checkpoint.py @@ -1,6 +1,26 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq + +# Docking oracles +from dgym.envs.oracle import DockingOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment +import random +import pdb +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -18,14 +38,10 @@ building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') -import torch -import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) -# Docking oracles -from dgym.envs.oracle import DockingOracle -from dgym.envs.utility import ClassicUtilityFunction + config = { 'center_x': 44.294, @@ -52,13 +68,7 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment + designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -66,7 +76,7 @@ cache = True ) -import random + initial_index = random.randint(0, len(deck)) initial_library = dg.MoleculeCollection([deck[initial_index]]) # 659 initial_library.update_annotations() @@ -108,12 +118,10 @@ try: result = experiment.run(**vars(args)) except: - import pdb; pdb.set_trace() + pdb.set_trace() # Export results -import json -import uuid -from utils import serialize_with_class_names + file_path = f'{args.out_dir}/selection_batch_size_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/exploration/selection_exploration.py b/scripts/selection/exploration/selection_exploration.py index b69d9bc..4ff0723 100644 --- a/scripts/selection/exploration/selection_exploration.py +++ b/scripts/selection/exploration/selection_exploration.py @@ -1,6 +1,27 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq + +# Docking oracles +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +import random + +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -17,8 +38,7 @@ def get_data(path): building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf') fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -26,9 +46,7 @@ def get_data(path): def get_oracles(path, sigma=0.5): - # Docking oracles - from dgym.envs.oracle import DockingOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + config = { 'center_x': 44.294, @@ -72,10 +90,6 @@ def get_drug_env( docking_utility ): - import pandas as pd - from dgym.molecule import Molecule - from dgym.envs.designer import Designer, Generator - from dgym.envs.drug_env import DrugEnv designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -84,7 +98,7 @@ def get_drug_env( ) # select first molecule - import random + def select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -110,8 +124,7 @@ def select_molecule(deck): def get_drug_agent(docking_utility, args): - from dgym.agents import SequentialDrugAgent - from dgym.agents.exploration import EpsilonGreedy + # Run the experiment sequence = [ @@ -162,14 +175,11 @@ def get_drug_agent(docking_utility, args): drug_agent = get_drug_agent(noisy_docking_utility, args) # Run experiment -from dgym.experiment import Experiment + experiment = Experiment(drug_agent, drug_env) result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names file_path = f'{args.out_dir}/selection_epsilon_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/max_noise/.ipynb_checkpoints/selection_noise-checkpoint.py b/scripts/selection/max_noise/.ipynb_checkpoints/selection_noise-checkpoint.py index b071706..3d9022c 100644 --- a/scripts/selection/max_noise/.ipynb_checkpoints/selection_noise-checkpoint.py +++ b/scripts/selection/max_noise/.ipynb_checkpoints/selection_noise-checkpoint.py @@ -1,6 +1,21 @@ import argparse import dgym as dg +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -19,8 +34,6 @@ fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') # Docking oracles -from dgym.envs.oracle import DockingOracle, NoisyOracle -from dgym.envs.utility import ClassicUtilityFunction config = { 'center_x': 44.294, @@ -63,13 +76,7 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment + designer = Designer( Generator(building_blocks, fingerprints), @@ -113,9 +120,6 @@ result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names file_path = f'{args.out_dir}/selection_noise_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/max_noise/.ipynb_checkpoints/selection_temperature-checkpoint.py b/scripts/selection/max_noise/.ipynb_checkpoints/selection_temperature-checkpoint.py index a9d6544..913d508 100644 --- a/scripts/selection/max_noise/.ipynb_checkpoints/selection_temperature-checkpoint.py +++ b/scripts/selection/max_noise/.ipynb_checkpoints/selection_temperature-checkpoint.py @@ -3,6 +3,31 @@ import dgym as dg import pandas as pd +import torch +import pyarrow.parquet as pq + +import random + +import os +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) +from copy import deepcopy + +from dgym.envs.designer import Designer, Generator +from dgym.envs import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +46,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +55,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +71,7 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +101,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -101,9 +122,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -124,7 +143,7 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy + utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -174,7 +193,7 @@ def get_agent_sequence(temperature: float): ) = get_data(path) # Get starting library -from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -201,7 +220,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -210,8 +229,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence(temperature = args.temperature) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -220,16 +238,14 @@ def get_agent_sequence(temperature: float): ) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment( drug_agent=drug_agent, drug_env=drug_env) file_path = f'{args.out_dir}/selection_temperature_{args.temperature}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names + result_serialized = serialize_with_class_names(result) json.dump(result_serialized, open(file_path, 'w')) \ No newline at end of file diff --git a/scripts/selection/max_noise/selection_max_noise.py b/scripts/selection/max_noise/selection_max_noise.py index 0f7d8ee..466434a 100644 --- a/scripts/selection/max_noise/selection_max_noise.py +++ b/scripts/selection/max_noise/selection_max_noise.py @@ -3,6 +3,28 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq +import random +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.utility import ( + ClassicUtilityFunction, Policy +) +from dgym.envs.oracle import GaussianOracle +from copy import deepcopy +from dgym.envs.designer import Designer, Generator +import json +from dgym.envs import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +43,6 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +51,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +67,7 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +97,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -101,9 +118,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, Policy - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -114,7 +129,7 @@ def get_multiple_utility_functions( log_S_oracle, ideal=(-3, 1), acceptable=(-4, 1)) # Assemble assays and surrogate models - from dgym.envs.oracle import GaussianOracle + assays = [ pIC50_oracle, log_P_oracle, @@ -125,7 +140,6 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy utility_agent = Policy( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -181,7 +195,6 @@ def get_agent_sequence(): print('Loaded data.', flush=True) # Get starting library -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -201,7 +214,6 @@ def get_agent_sequence(): print('Loaded oracles.', flush=True) # Load experiment state off disk if available -import json try: with open(args.experiment_state_path, 'r') as f: experiment_state = json.load(f) @@ -225,7 +237,7 @@ def get_agent_sequence(): print('Loaded utility functions.', flush=True) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -236,8 +248,7 @@ def get_agent_sequence(): print('Loaded DrugEnv.', flush=True) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence() drug_agent = SequentialDrugAgent( sequence = sequence, @@ -247,15 +258,13 @@ def get_agent_sequence(): print('Loaded DrugAgent.', flush=True) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment(drug_agent=drug_agent, drug_env=drug_env).load(experiment_state) file_path = args.experiment_state_path \ or f'{args.out_dir}/selection_max_noise_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import uuid -from utils import serialize_with_class_names result_serialized = serialize_with_class_names(result) with open(file_path, 'w') as f: diff --git a/scripts/selection/noise/.ipynb_checkpoints/selection_noise-checkpoint.py b/scripts/selection/noise/.ipynb_checkpoints/selection_noise-checkpoint.py index b071706..6c02864 100644 --- a/scripts/selection/noise/.ipynb_checkpoints/selection_noise-checkpoint.py +++ b/scripts/selection/noise/.ipynb_checkpoints/selection_noise-checkpoint.py @@ -1,6 +1,21 @@ import argparse import dgym as dg +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -19,8 +34,7 @@ fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') # Docking oracles -from dgym.envs.oracle import DockingOracle, NoisyOracle -from dgym.envs.utility import ClassicUtilityFunction + config = { 'center_x': 44.294, @@ -63,13 +77,7 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment + designer = Designer( Generator(building_blocks, fingerprints), @@ -113,9 +121,7 @@ result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names + file_path = f'{args.out_dir}/selection_noise_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/noise/.ipynb_checkpoints/selection_temperature-checkpoint.py b/scripts/selection/noise/.ipynb_checkpoints/selection_temperature-checkpoint.py index a9d6544..256eb4c 100644 --- a/scripts/selection/noise/.ipynb_checkpoints/selection_temperature-checkpoint.py +++ b/scripts/selection/noise/.ipynb_checkpoints/selection_temperature-checkpoint.py @@ -3,6 +3,36 @@ import dgym as dg import pandas as pd +import torch +import pyarrow.parquet as pq + +import random + +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) + +from copy import deepcopy + +from dgym.envs.designer import Designer, Generator + +from dgym.envs import DrugEnv + +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy + +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +51,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +60,7 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random + def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +77,7 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +107,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -101,9 +128,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -124,7 +149,7 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy + utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -174,7 +199,7 @@ def get_agent_sequence(temperature: float): ) = get_data(path) # Get starting library -from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -201,7 +226,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -210,8 +235,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence(temperature = args.temperature) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -220,16 +244,14 @@ def get_agent_sequence(temperature: float): ) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment( drug_agent=drug_agent, drug_env=drug_env) file_path = f'{args.out_dir}/selection_temperature_{args.temperature}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names + result_serialized = serialize_with_class_names(result) json.dump(result_serialized, open(file_path, 'w')) \ No newline at end of file diff --git a/scripts/selection/noise/selection_noise.py b/scripts/selection/noise/selection_noise.py index 88d42f7..8c95128 100644 --- a/scripts/selection/noise/selection_noise.py +++ b/scripts/selection/noise/selection_noise.py @@ -1,6 +1,28 @@ import uuid import argparse import dgym as dg +import torch +import pyarrow.parquet as pq +import random +import os + + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.utility import ( + ClassicUtilityFunction, Policy +) +from copy import deepcopy +from dgym.envs.designer import Designer, Generator +import json +from dgym.envs import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment +import uuid +from utils import serialize_with_class_names + def get_data(path): @@ -20,8 +42,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -30,7 +51,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -47,7 +67,6 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -77,9 +96,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -100,9 +117,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, Policy - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -123,7 +138,7 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy + utility_agent = Policy( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -179,7 +194,7 @@ def get_agent_sequence(): print('Loaded data.', flush=True) # Get starting library -from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -199,7 +214,7 @@ def get_agent_sequence(): print('Loaded oracles.', flush=True) # Load experiment state off disk if available -import json + try: with open(args.experiment_state_path, 'r') as f: experiment_state = json.load(f) @@ -224,7 +239,7 @@ def get_agent_sequence(): print('Loaded utility functions.', flush=True) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -235,8 +250,7 @@ def get_agent_sequence(): print('Loaded DrugEnv.', flush=True) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence() drug_agent = SequentialDrugAgent( sequence = sequence, @@ -246,15 +260,13 @@ def get_agent_sequence(): print('Loaded DrugAgent.', flush=True) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment(drug_agent=drug_agent, drug_env=drug_env).load(experiment_state) file_path = args.experiment_state_path \ or f'{args.out_dir}/selection_noise_{args.sigma}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import uuid -from utils import serialize_with_class_names result_serialized = serialize_with_class_names(result) with open(file_path, 'w') as f: diff --git a/scripts/selection/score_ratio/.ipynb_checkpoints/selection_noise-checkpoint.py b/scripts/selection/score_ratio/.ipynb_checkpoints/selection_noise-checkpoint.py index b071706..09a253c 100644 --- a/scripts/selection/score_ratio/.ipynb_checkpoints/selection_noise-checkpoint.py +++ b/scripts/selection/score_ratio/.ipynb_checkpoints/selection_noise-checkpoint.py @@ -1,6 +1,21 @@ import argparse import dgym as dg +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -19,8 +34,6 @@ fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') # Docking oracles -from dgym.envs.oracle import DockingOracle, NoisyOracle -from dgym.envs.utility import ClassicUtilityFunction config = { 'center_x': 44.294, @@ -63,13 +76,6 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment designer = Designer( Generator(building_blocks, fingerprints), @@ -113,9 +119,7 @@ result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names + file_path = f'{args.out_dir}/selection_noise_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/score_ratio/.ipynb_checkpoints/selection_temperature-checkpoint.py b/scripts/selection/score_ratio/.ipynb_checkpoints/selection_temperature-checkpoint.py index a9d6544..6bd4932 100644 --- a/scripts/selection/score_ratio/.ipynb_checkpoints/selection_temperature-checkpoint.py +++ b/scripts/selection/score_ratio/.ipynb_checkpoints/selection_temperature-checkpoint.py @@ -3,6 +3,36 @@ import dgym as dg import pandas as pd +import torch +import pyarrow.parquet as pq + +import random + +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) + +from copy import deepcopy + +from dgym.envs.designer import Designer, Generator + +from dgym.envs import DrugEnv + +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy + +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +51,6 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +59,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +75,6 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +104,6 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction target, config = get_docking_config(path, target_index) @@ -101,9 +124,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -124,7 +145,7 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy + utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -174,7 +195,7 @@ def get_agent_sequence(temperature: float): ) = get_data(path) # Get starting library -from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -201,7 +222,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -210,8 +231,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence(temperature = args.temperature) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -220,16 +240,14 @@ def get_agent_sequence(temperature: float): ) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment( drug_agent=drug_agent, drug_env=drug_env) file_path = f'{args.out_dir}/selection_temperature_{args.temperature}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names + result_serialized = serialize_with_class_names(result) json.dump(result_serialized, open(file_path, 'w')) \ No newline at end of file diff --git a/scripts/selection/score_ratio/selection_score_ratio.py b/scripts/selection/score_ratio/selection_score_ratio.py index cdc5279..9f4aabd 100644 --- a/scripts/selection/score_ratio/selection_score_ratio.py +++ b/scripts/selection/score_ratio/selection_score_ratio.py @@ -2,6 +2,38 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq + +import random + +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) + +from copy import deepcopy + +from dgym.envs.designer import Designer, Generator + +import json + +from dgym.envs import DrugEnv + +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy + +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -20,8 +52,6 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -30,7 +60,6 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -47,7 +76,6 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -77,9 +105,6 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction target, config = get_docking_config(path, target_index) @@ -100,9 +125,6 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -123,7 +145,6 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -187,7 +208,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded data.', flush=True) # Get starting library -from dgym.envs.designer import Designer, Generator designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -207,7 +227,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded oracles.', flush=True) # Load experiment state off disk if available -import json try: with open(args.experiment_state_path, 'r') as f: experiment_state = json.load(f) @@ -231,7 +250,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded utility functions.', flush=True) # Create DrugEnv -from dgym.envs import DrugEnv drug_env = DrugEnv( designer = designer, library = library, @@ -242,8 +260,6 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded DrugEnv.', flush=True) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy sequence = get_agent_sequence(batch_size=args.batch_size, score_ratio=args.score_ratio) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -253,16 +269,12 @@ def get_agent_sequence(batch_size: int = 24, score_ratio: int = 5): print('Loaded DrugAgent.', flush=True) # Create and run Experiment -from dgym.experiment import Experiment experiment = Experiment(drug_agent=drug_agent, drug_env=drug_env).load(experiment_state) file_path = args.experiment_state_path \ or f'{args.out_dir}/selection_batch_size_{args.batch_size}_score_ratio_{args.score_ratio}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names result_serialized = serialize_with_class_names(result) with open(file_path, 'w') as f: diff --git a/scripts/selection/targets/.ipynb_checkpoints/selection_noise-checkpoint.py b/scripts/selection/targets/.ipynb_checkpoints/selection_noise-checkpoint.py index b071706..6c02864 100644 --- a/scripts/selection/targets/.ipynb_checkpoints/selection_noise-checkpoint.py +++ b/scripts/selection/targets/.ipynb_checkpoints/selection_noise-checkpoint.py @@ -1,6 +1,21 @@ import argparse import dgym as dg +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -19,8 +34,7 @@ fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') # Docking oracles -from dgym.envs.oracle import DockingOracle, NoisyOracle -from dgym.envs.utility import ClassicUtilityFunction + config = { 'center_x': 44.294, @@ -63,13 +77,7 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment + designer = Designer( Generator(building_blocks, fingerprints), @@ -113,9 +121,7 @@ result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names + file_path = f'{args.out_dir}/selection_noise_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/targets/selection_targets.py b/scripts/selection/targets/selection_targets.py index b4b7d1e..d203368 100644 --- a/scripts/selection/targets/selection_targets.py +++ b/scripts/selection/targets/selection_targets.py @@ -1,5 +1,32 @@ import argparse import dgym as dg +import torch +import pyarrow.parquet as pq + +import os + +from dgym.envs.oracle import DockingOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv + +import random + +from dgym.experiment import Experiment +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv + +import json +import uuid +from utils import serialize_with_class_names def get_data(path): @@ -19,8 +46,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -29,7 +55,7 @@ def get_data(path): def get_docking_config(path, target_index): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -62,8 +88,7 @@ def get_docking_config(path, target_index): def get_utility(path, target_index): # Docking oracles - from dgym.envs.oracle import DockingOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + # Create noiseless evaluators name, receptor_path, config = get_docking_config(path, target_index) @@ -98,10 +123,7 @@ def get_drug_env( composite_utility ): - import pandas as pd - from dgym.molecule import Molecule - from dgym.envs.designer import Designer, Generator - from dgym.envs.drug_env import DrugEnv + designer = Designer( Generator(building_blocks, fingerprints, sizes), @@ -110,7 +132,7 @@ def get_drug_env( ) # select first molecule - import random + def select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -142,14 +164,7 @@ def select_molecule(deck): args = parser.parse_args() # Run experiment -from dgym.experiment import Experiment -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv # Load all data path = '../../../../dgym-data' @@ -198,9 +213,7 @@ def select_molecule(deck): result = experiment.run(**vars(args), out=file_path) # Export results -import json -import uuid -from utils import serialize_with_class_names + result_serialized = serialize_with_class_names(result) json.dump(result_serialized, open(file_path, 'w')) \ No newline at end of file diff --git a/scripts/selection/temperature/.ipynb_checkpoints/selection_noise-checkpoint.py b/scripts/selection/temperature/.ipynb_checkpoints/selection_noise-checkpoint.py index b071706..980676f 100644 --- a/scripts/selection/temperature/.ipynb_checkpoints/selection_noise-checkpoint.py +++ b/scripts/selection/temperature/.ipynb_checkpoints/selection_noise-checkpoint.py @@ -1,6 +1,21 @@ import argparse import dgym as dg +from dgym.envs.oracle import DockingOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +import pandas as pd +from dgym.molecule import Molecule +from dgym.envs.designer import Designer, Generator +from dgym.envs.drug_env import DrugEnv +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + # load all data path = '../../../../dgym-data' @@ -19,8 +34,7 @@ fingerprints = dg.datasets.fingerprints(f'{path}/out/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') # Docking oracles -from dgym.envs.oracle import DockingOracle, NoisyOracle -from dgym.envs.utility import ClassicUtilityFunction + config = { 'center_x': 44.294, @@ -63,13 +77,7 @@ acceptable=(7.125, 9.5) ) -import pandas as pd -from dgym.molecule import Molecule -from dgym.envs.designer import Designer, Generator -from dgym.envs.drug_env import DrugEnv -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy -from dgym.experiment import Experiment + designer = Designer( Generator(building_blocks, fingerprints), @@ -113,9 +121,6 @@ result = experiment.run(**vars(args)) # Export results -import json -import uuid -from utils import serialize_with_class_names file_path = f'{args.out_dir}/selection_noise_{uuid.uuid4()}.json' result_serialized = serialize_with_class_names(result) diff --git a/scripts/selection/temperature/.ipynb_checkpoints/selection_temperature-checkpoint.py b/scripts/selection/temperature/.ipynb_checkpoints/selection_temperature-checkpoint.py index a9d6544..256eb4c 100644 --- a/scripts/selection/temperature/.ipynb_checkpoints/selection_temperature-checkpoint.py +++ b/scripts/selection/temperature/.ipynb_checkpoints/selection_temperature-checkpoint.py @@ -3,6 +3,36 @@ import dgym as dg import pandas as pd +import torch +import pyarrow.parquet as pq + +import random + +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction + +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) + +from copy import deepcopy + +from dgym.envs.designer import Designer, Generator + +from dgym.envs import DrugEnv + +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy + +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +51,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +60,7 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random + def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +77,7 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +107,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -101,9 +128,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -124,7 +149,7 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy + utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -174,7 +199,7 @@ def get_agent_sequence(temperature: float): ) = get_data(path) # Get starting library -from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -201,7 +226,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -210,8 +235,7 @@ def get_agent_sequence(temperature: float): ) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence(temperature = args.temperature) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -220,16 +244,14 @@ def get_agent_sequence(temperature: float): ) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment( drug_agent=drug_agent, drug_env=drug_env) file_path = f'{args.out_dir}/selection_temperature_{args.temperature}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names + result_serialized = serialize_with_class_names(result) json.dump(result_serialized, open(file_path, 'w')) \ No newline at end of file diff --git a/scripts/selection/temperature/selection_temperature.py b/scripts/selection/temperature/selection_temperature.py index 3ed03d4..4a4c144 100644 --- a/scripts/selection/temperature/selection_temperature.py +++ b/scripts/selection/temperature/selection_temperature.py @@ -3,6 +3,34 @@ import dgym as dg import pandas as pd +import torch +import pyarrow.parquet as pq + +import random + +import os + +from dgym.envs.oracle import \ + DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle +from dgym.envs.utility import ClassicUtilityFunction +from dgym.envs.utility import ( + ClassicUtilityFunction, MultipleUtilityFunction +) + +from copy import deepcopy + +from dgym.envs.designer import Designer, Generator +from dgym.envs import DrugEnv + +from dgym.agents import SequentialDrugAgent +from dgym.agents.exploration import EpsilonGreedy + +from dgym.experiment import Experiment + +import json +import uuid +from utils import serialize_with_class_names + def get_data(path): deck = dg.MoleculeCollection.load( @@ -21,8 +49,7 @@ def get_data(path): fingerprints = dg.datasets.fingerprints( f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb') - import torch - import pyarrow.parquet as pq + table = pq.read_table(f'{path}/sizes.parquet')[0] sizes = torch.tensor(table.to_numpy()) @@ -31,7 +58,7 @@ def get_data(path): def get_initial_library(deck, designer): # select first molecule - import random + def _select_molecule(deck): initial_index = random.randint(0, len(deck) - 1) initial_molecule = deck[initial_index] @@ -48,7 +75,7 @@ def _select_molecule(deck): def get_docking_config(path: str, target_index: int): - import os + dockstring_dir = f'{path}/dockstring_targets/' files = os.listdir(dockstring_dir) @@ -78,9 +105,7 @@ def get_docking_config(path: str, target_index: int): def get_oracles(path: str, target_index: int): - from dgym.envs.oracle import \ - DockingOracle, CatBoostOracle, RDKitOracle, NoisyOracle - from dgym.envs.utility import ClassicUtilityFunction + target, config = get_docking_config(path, target_index) @@ -101,9 +126,7 @@ def get_multiple_utility_functions( log_S_oracle, sigma=1.0 ): - from dgym.envs.utility import ( - ClassicUtilityFunction, MultipleUtilityFunction - ) + # Define utility functions pIC50_utility = ClassicUtilityFunction( @@ -124,7 +147,7 @@ def get_multiple_utility_functions( ] # Environment tolerates acceptable ADMET - from copy import deepcopy + utility_agent = MultipleUtilityFunction( utility_functions = [pIC50_utility, log_P_utility, log_S_utility], weights = [0.8, 0.1, 0.1] @@ -210,7 +233,7 @@ def get_agent_sequence(temperature_index: int): print('Loaded data.', flush=True) # Get starting library -from dgym.envs.designer import Designer, Generator + designer = Designer( Generator(building_blocks, fingerprints, sizes), reactions, cache = True) library = get_initial_library(deck, designer) @@ -243,7 +266,7 @@ def get_agent_sequence(temperature_index: int): print('Loaded utility functions.', flush=True) # Create DrugEnv -from dgym.envs import DrugEnv + drug_env = DrugEnv( designer = designer, library = library, @@ -254,8 +277,7 @@ def get_agent_sequence(temperature_index: int): print('Loaded DrugEnv.', flush=True) # Create DrugAgent -from dgym.agents import SequentialDrugAgent -from dgym.agents.exploration import EpsilonGreedy + sequence = get_agent_sequence(temperature_index = args.temperature_index) drug_agent = SequentialDrugAgent( sequence = sequence, @@ -265,16 +287,14 @@ def get_agent_sequence(temperature_index: int): print('Loaded DrugAgent.', flush=True) # Create and run Experiment -from dgym.experiment import Experiment + experiment = Experiment( drug_agent=drug_agent, drug_env=drug_env) file_path = f'{args.out_dir}/selection_temperature_{args.temperature_index}_{uuid.uuid4()}.json' result = experiment.run(**vars(args), out=file_path)[0] # Export results -import json -import uuid -from utils import serialize_with_class_names + result_serialized = serialize_with_class_names(result) with open(file_path, 'w') as f: