Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: ligandprep skip failed cases #99

Merged
merged 3 commits into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion unidock/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ make clang-format
### Using binary

Please download the latest binary of Uni-Dock at the assets tab of [the Release page](https://github.com/dptech-corp/Uni-Dock/releases).
Executable `unidock` supports vina and vinardo scoring functions, and `unidock_ad4` supports ad4 scoring function.
Executable `unidock` to dock.

After downloading, please make sure that the path to `unidock` is in your `PATH` environment variable.

Expand All @@ -69,7 +69,11 @@ After downloading, please make sure that the path to `unidock` is in your `PATH`
To launch a Uni-Dock job, the most important parameters are as follows:

- `--receptor`: filepath of the receptor (PDBQT)

(If you want to use `ad4` scoring function, you need to generate affinity maps first and use `--maps <mapdir/receptor_prefix>` instead of `--receptor`. The method to generate maps is in [here](https://autodock-vina.readthedocs.io/en/latest/docking_basic.html#optional-generating-affinity-maps-for-autodock-ff))

- `--gpu_batch`: filepath of the ligands to dock with GPU (PDBQT), enter multiple at a time, separated by spaces (" ")

- `--search_mode`: computational complexity, choose in \[*`fast`*, *`balance`*, and *`detail`*].

***Advanced options***
Expand Down
4 changes: 2 additions & 2 deletions unidock_tools/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ pythonVersion = "3.11"
pythonPlatform = "Linux"
reportMissingImports = false
reportMissingModuleSource = false
reportMissingTypeStubs = false
reportAttributeAccessIssue = false
reportIncompatibleMethodOverride = false
reportIncompatibleMethodOverride = false
reportCallIssue = false
43 changes: 30 additions & 13 deletions unidock_tools/src/unidock_tools/application/ligandprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,50 @@
import os
from functools import partial
from multiprocessing import Pool
import traceback
import logging
import argparse
from rdkit import Chem

from unidock_tools.utils import read_ligand
from unidock_tools.modules.ligand_prep import TopologyBuilder


def iter_ligands(ligands: List[Path], batch_size: int = 1200,
use_file_name: bool = False) -> Generator[List[Tuple[Chem.Mol, str]], None, None]:
curr_mol_name_list = []
for ligand in ligands:
mols = list(Chem.SDMolSupplier(str(ligand), removeHs=False))
mols = read_ligand(ligand)
for i, mol in enumerate(mols):
if not use_file_name and mol.HasProp("_Name") and mol.GetProp("_Name").strip():
name = mol.GetProp("_Name").strip()
if mol:
if not use_file_name and mol.HasProp("_Name") and mol.GetProp("_Name").strip():
name = mol.GetProp("_Name").strip()
else:
name = f"{ligand.stem}_{i}" if len(mols) > 1 else ligand.stem
curr_mol_name_list.append((mol, name))
if len(curr_mol_name_list) > batch_size:
yield curr_mol_name_list
curr_mol_name_list = []
else:
name = f"{ligand.stem}_{i}" if len(mols) > 1 else ligand.stem
curr_mol_name_list.append((mol, name))
if len(curr_mol_name_list) > batch_size:
yield curr_mol_name_list
curr_mol_name_list = []
logging.warning(f"read ligand file {ligand.stem} ind {i} error")
if len(curr_mol_name_list) > 0:
yield curr_mol_name_list
return


def ligprep(mol_name_tup: Tuple[Chem.Mol, str], savedir: Path):
def ligprep(mol_name_tup: Tuple[Chem.Mol, str], savedir: Path, save_format: str = "sdf"):
mol, name = mol_name_tup
tb = TopologyBuilder(mol)
tb.build_molecular_graph()
tb.write_sdf_file(os.path.join(savedir, f"{name}.sdf"))
try:
tb = TopologyBuilder(mol)
tb.build_molecular_graph()
if save_format == "sdf":
tb.write_sdf_file(os.path.join(savedir, f"{name}.sdf"))
elif save_format == "pdbqt":
tb.write_pdbqt_file(os.path.join(savedir, f"{name}.pdbqt"))
else:
logging.error(f"Invalid save format: {save_format}")
except:
logging.error(f"ligprep failed for {name}: {traceback.format_exc()}")


def main(args: dict):
Expand All @@ -52,9 +65,11 @@ def main(args: dict):
continue
ligands.append(Path(line.strip()).resolve())

os.makedirs(Path(args["savedir"]).resolve(), exist_ok=True)
for mol_name_tup_list in iter_ligands(ligands, args["batch_size"], args["use_file_name"]):
with Pool(os.cpu_count()) as pool:
pool.map(partial(ligprep, savedir=args["savedir"]), mol_name_tup_list)
pool.map(partial(ligprep, savedir=args["savedir"],
save_format=args["save_format"]), mol_name_tup_list)


def get_parser() -> argparse.ArgumentParser:
Expand All @@ -65,6 +80,8 @@ def get_parser() -> argparse.ArgumentParser:
help="A text file containing the path of ligand files in sdf format.")
parser.add_argument("-sd", "--savedir", type=str, default="ligprep_results",
help="Save directory. Default: 'MultiConfDock-Result'.")
parser.add_argument("-sf", "--save_format", type=str, default="sdf",
help="Ligprep result files format. Choose from ['sdf', 'pdbqt']. Default: 'sdf'.")
parser.add_argument("-bs", "--batch_size", type=int, default=1200,
help="Batch size for docking. Default: 1200.")
parser.add_argument("-ufn", "--use_file_name", action="store_true",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def main(args: dict):
batch_size=int(args["batch_size"]),
score_only=bool(args["score_only"]),
local_only=bool(args["local_only"]),
props_list=["fragInfo", "torsionInfo", "atomInfo"],
)
runner.save_results(save_dir=savedir)
end_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Tuple
from typing import Dict, Tuple, Union
import os
import numpy as np
import networkx as nx
Expand Down Expand Up @@ -163,7 +163,7 @@ def build_molecular_graph(self):
node_idx = 0
root_fragment = splitted_mol_list[root_fragment_idx]
num_root_atoms = root_fragment.GetNumAtoms()
atom_info_list = [None] * num_root_atoms
atom_info_list = [dict()] * num_root_atoms
for root_atom_idx in range(num_root_atoms):
root_atom = root_fragment.GetAtomWithIdx(root_atom_idx)
atom_info_dict = {}
Expand All @@ -189,7 +189,7 @@ def build_molecular_graph(self):
else:
fragment = splitted_mol_list[fragment_idx]
num_fragment_atoms = fragment.GetNumAtoms()
atom_info_list = [None] * num_fragment_atoms
atom_info_list = [dict()] * num_fragment_atoms
for atom_idx in range(num_fragment_atoms):
atom = fragment.GetAtomWithIdx(atom_idx)
atom_info_dict = {}
Expand Down Expand Up @@ -341,7 +341,7 @@ def _deep_first_search(self, node_idx):
self.pdbqt_atom_line_list.append(
self.pdbqt_end_branch_line_format.format('ENDBRANCH', parent_atom_idx, offspring_atom_idx))

def write_pdbqt_file(self, out_file: str = ''):
def write_pdbqt_file(self, out_file: Union[str, bytes, os.PathLike]):
self.pdbqt_remark_line_list = []
self.pdbqt_atom_line_list = []

Expand Down Expand Up @@ -383,7 +383,7 @@ def write_pdbqt_file(self, out_file: str = ''):
for pdbqt_line in self.pdbqt_line_list:
f.write(pdbqt_line)

def write_constraint_bpf_file(self, out_path: str = ''):
def write_constraint_bpf_file(self, out_path: Union[str, bytes, os.PathLike]):
self.core_bpf_remark_line_list = []
self.core_bpf_atom_line_list = []
self.core_bpf_atom_line_format = '{:8.3f}\t{:8.3f}\t{:8.3f}\t{:6.2f}\t{:6.2f}\t{:3s}\t{:<2s}\n'
Expand Down Expand Up @@ -452,21 +452,13 @@ def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str, str]:

return frag_info_str, frag_all_info_str, torsion_info_str, atom_info_str

def write_sdf_file(self, out_file: str = '', do_rigid_docking: bool = False):
def write_sdf_file(self, out_file: Union[str, bytes, os.PathLike], do_rigid_docking: bool = False):
frag_info_str, frag_all_info_str, torsion_info_str, atom_info_str = self.get_sdf_torsion_tree_info()
if do_rigid_docking:
self.mol.SetProp("fragInfo", frag_all_info_str)
else:
self.mol.SetProp("fragInfo", frag_info_str)
self.mol.SetProp("torsionInfo", torsion_info_str)
self.mol.SetProp("atomInfo", atom_info_str)
if out_file:
os.makedirs(os.path.dirname(os.path.abspath(out_file)), exist_ok=True)
with Chem.SDWriter(out_file) as writer:
writer.write(self.mol)


def generate_topology(mol: Chem.Mol, out_file: str = ''):
topology_builder = TopologyBuilder(mol)
topology_builder.build_molecular_graph()
topology_builder.write_pdbqt_file(out_file=out_file)
with Chem.SDWriter(str(out_file)) as writer:
writer.write(self.mol)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from math import isnan, isinf
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdchem import BondType


def get_pdbqt_atom_lines(mol: Chem.Mol, donors: List[int], acceptors: List[int]):
Expand Down
9 changes: 4 additions & 5 deletions unidock_tools/src/unidock_tools/utils/mol_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def update_conf_props(self, conf_props: dict):
self.conf_props.update(conf_props)

def get_rdkit_mol_conf_with_props(self, conf_idx: int, props_list: List[str] = [],
exclude_props_list: List[str] = []):
exclude_props_list: List[str] = []) -> Chem.Mol:
mol = copy.copy(self.mol_confs[conf_idx])
props = copy.deepcopy(self.get_props())
props.update({k:v[conf_idx] for k, v in self.get_conf_props().items()})
Expand Down Expand Up @@ -135,10 +135,9 @@ def write_sdf_by_idx(self,
) -> List[Path]:
save_dir = make_tmp_dir(str(save_dir), False, False)

mol_confs_copy = [None] * len(self.mol_group[idx])
for conf_idx in range(len(self.mol_group[idx])):
mol_conf_copy = self.mol_group[idx].get_rdkit_mol_conf_with_props(conf_idx, props_list, exclude_props_list)
mol_confs_copy[conf_idx] = mol_conf_copy
mol_confs_copy = [self.mol_group[idx].get_rdkit_mol_conf_with_props(
conf_idx, props_list, exclude_props_list) for conf_idx in range(
len(self.mol_group[idx]))]
# save SDF files
file_prefix = self.mol_group[idx].get_props()['file_prefix']
sdf_file_list = []
Expand Down
4 changes: 2 additions & 2 deletions unidock_tools/src/unidock_tools/utils/rdkit_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def set_properties(mol: Chem.Mol, properties: dict):
"""
for key, value in properties.items():
try:
if isinstance(key, int):
if isinstance(value, int):
mol.SetIntProp(key, value)
elif isinstance(key, float):
elif isinstance(value, float):
mol.SetDoubleProp(key, value)
else:
mol.SetProp(key, str(value))
Expand Down
Loading