Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Move All Imports to the Top of Files #2

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions dgym/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
# =============================================================================
# IMPORTS
# =============================================================================
import copy
import random
import dgym as dg
import torch
import random
import itertools
import numpy as np
import pandas as pd
from rdkit import Chem
from functools import partial
from dgym.molecule import Molecule
from dgym.reaction import LazyReaction
from typing import Union, Iterable, Optional, List, Any, Callable
Expand Down Expand Up @@ -194,7 +197,6 @@ def __setitem__(self, key: Any, value: Any):

def shuffle(self, seed=None):
""" Shuffle the collection and return it. """
import random
if seed is not None:
random.seed(seed)
random.shuffle(self._items)
Expand Down Expand Up @@ -334,7 +336,6 @@ def unique(self):

def copy(self, unique=False):
""" Return a copy of self. """
import copy
return self.__class__(copy.deepcopy(self._items))

def view(
Expand All @@ -357,8 +358,6 @@ def view(
torch.utils.data.DataLoader
Resulting data loader.
"""
from functools import partial

if collate_fn is None:
# provide default collate function
collate_fn = self._batch
Expand Down Expand Up @@ -524,7 +523,6 @@ def from_json(
From SmilesClickChem: https://zenodo.org/record/4100676

"""
import pandas as pd
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(This was duplicated)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There may be other imports than we can remove, but its outside of the scope of this change.
It'd be prioritary to set the test bed and reduce repetition first, and only then hunt these down.


# load from JSON
reactions_df = pd.read_json(path).T.reset_index(drop=True)
Expand Down
2 changes: 1 addition & 1 deletion dgym/datasets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import chemfp
import pandas as pd
from rdkit import Chem
from typing import Optional
from rdkit.Chem import AllChem
from rdkit.Chem import PandasTools
from dgym.reaction import Reaction

import chemfp
chemfp.set_license('20241121-columbia.edu@DAAAPLPPDDKGPECJIJJGFNBEPIIKHOOMFAOG')

def fingerprints(path):
Expand Down
20 changes: 9 additions & 11 deletions dgym/envs/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
from sklearn.preprocessing import normalize
from scikit_mol.descriptors import MolecularDescriptorTransformer

import re
import glob
from itertools import islice

import subprocess
import os
import shutil
import tempfile

class OracleCache(dict):
def __missing__(self, key):
Expand Down Expand Up @@ -418,10 +426,6 @@ def _convert_units(


def _gather_results(self, directory: str):

import re
import glob
from itertools import islice

scores = []
smiles = []
Expand Down Expand Up @@ -451,7 +455,6 @@ def _gather_results(self, directory: str):
return smiles, scores

def _dock(self, command: str):
import subprocess
return subprocess.run(
command,
shell=True,
Expand Down Expand Up @@ -480,10 +483,7 @@ def _prepare_command(self, config, directory: str):

return ' '.join(['unidock', *inputs])

def _prepare_ligands(self, molecules, directory: str):

import os

def _prepare_ligands(self, molecules, directory: str):
failed = []
paths = []
for idx, mol in enumerate(molecules):
Expand Down Expand Up @@ -548,8 +548,6 @@ def _compute_deltaG(self, energies, temperature=298.15):

@contextmanager
def _managed_directory(self, dir_path=None):
import shutil
import tempfile
is_temp_dir = False
if dir_path is None:
dir_path = tempfile.mkdtemp()
Expand Down
6 changes: 2 additions & 4 deletions dgym/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from rdkit import Chem
from rdkit.Chem import rdFMCS, AllChem, Draw

import chemfp
chemfp.set_license('20231114-columbia.edu@DAAABLGMDNEEHFALIFOLIONPFHFDJDOLHABF')
__all__ = [
'sort_fingerprints',
'partition_building_blocks',
Expand All @@ -20,10 +22,6 @@
'align_lineage'
]

import chemfp
chemfp.set_license('20231114-columbia.edu@DAAABLGMDNEEHFALIFOLIONPFHFDJDOLHABF')


# Building block processing.
# -----------------------------------------------
def compute_fingerprints(sdf_path: str = './', out_path: str = './out'):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
import random

import dgym as dg
import torch
import pyarrow.parquet as pq
# Docking oracles
from dgym.envs.oracle import DockingOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction
from dgym.envs.designer import Designer, Generator
import uuid

def get_data(path):

Expand All @@ -25,19 +32,12 @@ def get_data(path):
building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table(f'{path}/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

return deck, reactions, building_blocks, fingerprints, sizes

def get_oracles(path, sigma=0.1):

# Docking oracles
from dgym.envs.oracle import DockingOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction

config = {
'center_x': 44.294,
'center_y': 28.123,
Expand Down Expand Up @@ -116,7 +116,6 @@ def get_molecules(deck, batch_size):
sizes
) = get_data(path)

from dgym.envs.designer import Designer, Generator

designer = Designer(
Generator(building_blocks, fingerprints, sizes),
Expand Down Expand Up @@ -147,7 +146,6 @@ def get_molecules(deck, batch_size):
batch_size = 300

# Check if file already exists
import uuid

file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv'
for _ in range(10):
Expand Down
15 changes: 8 additions & 7 deletions scripts/screen/adam17/adam17_screen.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@
import random

import dgym as dg
import torch
import pyarrow.parquet as pq

from dgym.envs.oracle import DockingOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction

from dgym.envs.designer import Designer, Generator
import uuid

def get_data(path):

Expand All @@ -25,8 +33,6 @@ def get_data(path):
building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table(f'{path}/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Expand All @@ -35,9 +41,6 @@ def get_data(path):
def get_oracles(path, sigma=0.1):

# Docking oracles
from dgym.envs.oracle import DockingOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction

config = {
'center_x': 44.294,
'center_y': 28.123,
Expand Down Expand Up @@ -116,7 +119,6 @@ def get_molecules(deck, batch_size):
sizes
) = get_data(path)

from dgym.envs.designer import Designer, Generator

designer = Designer(
Generator(building_blocks, fingerprints, sizes),
Expand Down Expand Up @@ -147,7 +149,6 @@ def get_molecules(deck, batch_size):
batch_size = 300

# Check if file already exists
import uuid

file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv'
for _ in range(10):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
import random

import dgym as dg
import torch
import pyarrow.parquet as pq
# Docking oracles
from dgym.envs.oracle import DockingOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction
from dgym.envs.designer import Designer, Generator
import uuid

def get_data(path):

Expand All @@ -25,19 +32,13 @@ def get_data(path):
building_blocks = dg.datasets.disk_loader(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630.sdf')
fingerprints = dg.datasets.fingerprints(f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table(f'{path}/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

return deck, reactions, building_blocks, fingerprints, sizes

def get_oracles(path, sigma=0.1):

# Docking oracles
from dgym.envs.oracle import DockingOracle, NoisyOracle
from dgym.envs.utility import ClassicUtilityFunction

config = {
'center_x': 44.294,
'center_y': 28.123,
Expand Down Expand Up @@ -116,8 +117,6 @@ def get_molecules(deck, batch_size):
sizes
) = get_data(path)

from dgym.envs.designer import Designer, Generator

designer = Designer(
Generator(building_blocks, fingerprints, sizes),
reactions,
Expand Down Expand Up @@ -147,7 +146,6 @@ def get_molecules(deck, batch_size):
batch_size = 300

# Check if file already exists
import uuid

file_path = f'./out/adam17_random_batch_{uuid.uuid4()}.tsv'
for _ in range(10):
Expand Down
26 changes: 13 additions & 13 deletions scripts/screen/dockstring/dockstring_screen.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
import argparse
import dgym as dg
import torch
import pyarrow.parquet as pq
from dgym.envs.designer import Designer, Generator
# select first molecule
import random
import os
from dgym.envs.oracle import DockingOracle
import pandas as pd
from dgym.molecule import Molecule
from dgym.envs.designer import Designer, Generator
import os


def get_data(path):

Expand All @@ -19,8 +31,6 @@ def get_data(path):
fingerprints = dg.datasets.fingerprints(
f'{path}/Enamine_Building_Blocks_Stock_262336cmpd_20230630_atoms.fpb')

import torch
import pyarrow.parquet as pq
table = pq.read_table(f'{path}/sizes.parquet')[0]
sizes = torch.tensor(table.to_numpy())

Expand All @@ -34,17 +44,13 @@ def get_molecules(
fingerprints,
sizes,
):

from dgym.envs.designer import Designer, Generator


designer = Designer(
Generator(building_blocks, fingerprints, sizes),
reactions,
cache = True
)

# select first molecule
import random

def select_molecule(deck):
initial_index = random.randint(0, len(deck))
Expand Down Expand Up @@ -75,7 +81,6 @@ def select_molecule(deck):

def get_docking_config(path, target_index, scorer):

import os

dockstring_dir = f'{path}/dockstring_targets/'
files = os.listdir(dockstring_dir)
Expand Down Expand Up @@ -107,7 +112,6 @@ def get_docking_config(path, target_index, scorer):

def get_oracle(path: str, target_index: int, scorer: str):

from dgym.envs.oracle import DockingOracle

# Create noiseless evaluators
name, receptor_path, config = get_docking_config(path, target_index, scorer=scorer)
Expand All @@ -124,9 +128,6 @@ def get_oracle(path: str, target_index: int, scorer: str):
args = parser.parse_args()

# Run experiment
import pandas as pd
from dgym.molecule import Molecule
from dgym.envs.designer import Designer, Generator

# Load all data
path = '../../../../dgym-data'
Expand Down Expand Up @@ -163,7 +164,6 @@ def get_oracle(path: str, target_index: int, scorer: str):
results_df['scorer'] = args.scorer

# Write to disk
import os

file_path = f'{args.out_dir}/screen_targets_{args.target_index}_{args.scorer}.tsv'
results_df.to_csv(
Expand Down
Loading