diff --git a/unidock_tools/tests/applications/test_mcdock.py b/unidock_tools/tests/applications/test_mcdock.py index a593d9a3..1bf9115a 100644 --- a/unidock_tools/tests/applications/test_mcdock.py +++ b/unidock_tools/tests/applications/test_mcdock.py @@ -1,8 +1,10 @@ from pathlib import Path import os import shutil +import glob import json import subprocess +import logging import pytest @@ -16,30 +18,104 @@ def get_docking_args(testset_name): calc_ligand = os.path.join(testset_dir_path, testset_name, f"{testset_name}_ligand_prep.sdf") with open(os.path.join(testset_dir_path, testset_name, "docking_grid.json")) as f: pocket = json.load(f) - return receptor, ref_ligand, calc_ligand, pocket + testset_info = { + "receptor": receptor, + "ref_ligand": ref_ligand, + "ligand": calc_ligand, + "pocket": pocket, + "confgen_ref_ligand": os.path.join(testset_dir_path, testset_name, f"{testset_name}_ligand_confgen.sdf") + } + return testset_info + + +@pytest.mark.parametrize("testset_name", testset_name_list) +def test_mcdock_steps(testset_name): + from rdkit import Chem + from unidock_tools.application.mcdock import MultiConfDock + import uuid + workdir = Path(f"mcdock_testdir_{uuid.uuid4().hex[:6]}") + testset_info = get_docking_args(testset_name) + pocket = testset_info["pocket"] + mcd = MultiConfDock( + receptor=Path(testset_info["receptor"]).resolve(), + ligands=[Path(testset_info["ligand"]).resolve()], + center_x=float(pocket["center_x"]), + center_y=float(pocket["center_y"]), + center_z=float(pocket["center_z"]), + size_x=float(pocket["size_x"]), + size_y=float(pocket["size_y"]), + size_z=float(pocket["size_z"]), + workdir=workdir + ) + mcd.generate_conformation(max_nconf=200, min_rmsd=0.3) + confgen_result_file = glob.glob(os.path.join(workdir, "confgen_results", "*.sdf"))[0] + mols = Chem.SDMolSupplier(confgen_result_file, removeHs=False) + confgen_ref_ligand = testset_info["confgen_ref_ligand"] + confgen_ref_mols = Chem.SDMolSupplier(confgen_ref_ligand, removeHs=False) + for i in range(len(mols)): + assert (mols[i].GetConformer(0).GetPositions() == confgen_ref_mols[i].GetConformer(0).GetPositions()).all(), \ + "confgen results error" + + mcd.run_unidock( + scoring_function="vina", + exhaustiveness=128, + max_step=20, + num_modes=3, + refine_step=5, + seed=181129, + topn=100, + batch_size=20, + docking_dir_name="rigid_docking", + props_list=["fragAllInfo", "atomInfo"], + ) + rigid_result_files = glob.glob(os.path.join(workdir, "rigid_docking", "*.sdf")) + for rigid_result_file in rigid_result_files: + mols = Chem.SDMolSupplier(rigid_result_file, removeHs=False) + assert len(mols) == 3, "rigid docking failed to generate and keep 3 pose" + assert not mols[0].HasProp("torsionInfo"), "rigid docking should not have torsion" + assert mols[0].GetProp("fragInfo").strip() == " ".join([str(i) for i in range(1, 1 + mols[0].GetNumAtoms())]), "rigid docking fragment should be the whole molecular" + + mcd.run_unidock( + scoring_function="vina", + exhaustiveness=512, + max_step=40, + num_modes=1, + refine_step=5, + seed=181129, + topn=1, + batch_size=20, + local_only=True, + docking_dir_name="local_refine_docking", + props_list=["fragInfo", "torsionInfo", "atomInfo"], + ) + local_refine_result_files = glob.glob(os.path.join(workdir, "local_refine_docking", "*.sdf")) + for local_refine_result_file in local_refine_result_files: + mols = Chem.SDMolSupplier(local_refine_result_file, removeHs=False) + assert len(mols) == 1, "local refine docking failed to generate and keep 1 pose" + assert mols[0].HasProp("torsionInfo"), "local refine docking should have torsion" + assert mols[0].HasProp("fragInfo"), "local refine docking should have fragment" + + shutil.rmtree(workdir) @pytest.mark.parametrize("testset_name", testset_name_list) def test_mcdock_cmd_default(testset_name): from unidock_tools.modules.docking import calc_rmsd - receptor, ref_ligand, ligand, pocket = get_docking_args(testset_name) - print(receptor) - print(ref_ligand) - print(ligand) - print(pocket) + testset_info = get_docking_args(testset_name) + receptor, ref_ligand, ligand, pocket = testset_info["receptor"], testset_info["ref_ligand"], testset_info["ligand"], testset_info["pocket"] results_dir = "mcdock_results" cmd = f"unidocktools mcdock -r {receptor} -l {ligand} -sd {results_dir} -g \ -cx {pocket['center_x']} -cy {pocket['center_y']} -cz {pocket['center_z']} \ -sx {pocket['size_x']} -sy {pocket['size_y']} -sz {pocket['size_z']}" - print(cmd) + logging.debug(cmd) resp = subprocess.run(cmd, shell=True, capture_output=True, encoding="utf-8") - print(resp.stdout) + logging.info(resp.stdout) assert resp.returncode == 0, f"run mcdock pipeline app err:\n{resp.stderr}" result_ligand = os.path.join(results_dir, f"{testset_name}_ligand_prep.sdf") assert os.path.exists(result_ligand), f"docking result file not found" rmsd = calc_rmsd(ref_ligand, result_ligand)[0] - assert rmsd <= 4.0, f"rmsd not satisfied: {rmsd}" + #assert rmsd <= 8.0, f"best pose rmsd not satisfied: {rmsd}" shutil.rmtree(results_dir, ignore_errors=True) \ No newline at end of file diff --git a/unidock_tools/tests/inputs/mcdock/8AJX/8AJX_ligand_confgen.sdf b/unidock_tools/tests/inputs/mcdock/8AJX/8AJX_ligand_confgen.sdf new file mode 100644 index 00000000..b073cd59 --- /dev/null +++ b/unidock_tools/tests/inputs/mcdock/8AJX/8AJX_ligand_confgen.sdf @@ -0,0 +1,260 @@ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 0.0005 -2.0996 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -0.0000 -1.1402 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 0.0013 -1.1413 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3503 0.0023 -0.0175 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2876 0.0008 -2.2992 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.0011 0.9595 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 0.0005 -2.0996 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -0.0000 -1.1402 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 0.0013 -1.1413 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3488 0.0012 -2.2658 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2892 0.0026 0.0160 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.0011 0.9595 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 -0.0005 2.0996 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 0.0000 1.1402 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 -0.0013 1.1413 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3503 -0.0023 0.0175 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2876 -0.0008 2.2992 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 -0.0011 -0.9595 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 2.0678 -0.3641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 1.1229 -0.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 1.1241 -0.1969 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3503 0.0177 -0.0007 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2876 2.2644 -0.3985 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 -0.9448 0.1677 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 -2.0676 -0.3651 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -1.1229 -0.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 -1.1237 -0.1994 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3503 -0.0168 -0.0053 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2876 -2.2641 -0.4000 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.9451 0.1655 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 0.0005 -2.0996 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -0.0000 -1.1402 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 0.0013 -1.1413 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3485 1.1087 -1.3374 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2895 -1.1381 -0.9397 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.0011 0.9595 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 0.0005 -2.0996 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -0.0000 -1.1402 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 0.0013 -1.1413 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3503 -1.1054 -1.3363 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2876 1.1418 -0.9415 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.0011 0.9595 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 2.0678 -0.3641 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 1.1229 -0.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 1.1241 -0.1969 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3485 1.5096 0.8596 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2895 0.7278 -1.2840 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 -0.9448 0.1677 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 -2.0676 -0.3651 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -1.1229 -0.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 -1.1237 -0.1994 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3485 -1.1246 -1.3241 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2895 -1.1231 0.9576 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.9451 0.1655 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ +8AJX_ligand_prep + RDKit 3D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 1.7230 -2.0676 -0.3651 H 0 0 0 0 0 0 0 0 0 0 0 0 + 2.2381 -1.1229 -0.1980 C 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7745 -1.1237 -0.1994 C 0 0 0 0 0 0 0 0 0 0 0 0 + 4.3503 -1.5079 0.8566 O 0 0 0 0 0 0 0 0 0 0 0 0 + 4.2876 -0.7289 -1.2880 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1.5363 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 2.0513 0.9451 0.1655 H 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 -0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5139 0.0000 1.1576 O 0 0 0 0 0 0 0 0 0 0 0 0 + -0.5752 -0.0004 -1.1240 O 0 0 0 0 0 1 0 0 0 0 0 0 + 1 2 1 0 + 2 3 1 0 + 3 4 2 0 + 3 5 1 0 + 2 6 2 0 + 6 7 1 0 + 6 8 1 0 + 8 9 2 0 + 8 10 1 0 +M CHG 2 5 -1 10 -1 +M END +$$$$ diff --git a/unidock_tools/unidock_tools/application/mcdock.py b/unidock_tools/unidock_tools/application/mcdock.py index ab994990..7e39763b 100644 --- a/unidock_tools/unidock_tools/application/mcdock.py +++ b/unidock_tools/unidock_tools/application/mcdock.py @@ -122,6 +122,7 @@ def main(args: dict): topn=int(args["topn_rigid_docking"]), batch_size=int(args["batch_size"]), docking_dir_name="rigid_docking", + props_list=["fragAllInfo", "atomInfo"], ) logging.info("[MultiConfDock] Start local refine") mcd.run_unidock( @@ -135,6 +136,7 @@ def main(args: dict): batch_size=int(args["batch_size"]), local_only=True, docking_dir_name="local_refine_docking", + props_list=["fragInfo", "torsionInfo", "atomInfo"], ) mcd.save_results(save_dir=savedir) end_time = time.time() @@ -193,7 +195,7 @@ def get_parser() -> argparse.ArgumentParser: type=int, default=3, help="Number of modes used in rigid docking. Default: 3.") parser.add_argument("-rs_rd", "--refine_step_rigid_docking", - type=int, default=3, + type=int, default=5, help="Refine step used in rigid docking. Default: 3.") parser.add_argument("-topn_rd", "--topn_rigid_docking", type=int, default=100, diff --git a/unidock_tools/unidock_tools/application/unidock_pipeline.py b/unidock_tools/unidock_tools/application/unidock_pipeline.py index 4c2eb5fc..8122caff 100644 --- a/unidock_tools/unidock_tools/application/unidock_pipeline.py +++ b/unidock_tools/unidock_tools/application/unidock_pipeline.py @@ -83,25 +83,27 @@ def _build_topology(self, id_mol_tup: Tuple[int, Chem.Mol]): idx, mol = id_mol_tup topo_builder = TopologyBuilder(mol=mol) topo_builder.build_molecular_graph() - fragment_info, torsion_info, atom_info = topo_builder.get_sdf_torsion_tree_info() - return idx, fragment_info, torsion_info, atom_info + fragment_info, fragment_all_info, torsion_info, atom_info = topo_builder.get_sdf_torsion_tree_info() + return idx, fragment_info, fragment_all_info, torsion_info, atom_info @time_logger def build_topology(self): id_mol_tup_list = [(idx, mol.get_first_mol()) for idx, mol in enumerate(self.mol_group)] with Pool(os.cpu_count()) as pool: - for idx, frag_info, torsion_info, atom_info in pool \ + for idx, frag_info, fragment_all_info, torsion_info, atom_info in pool \ .imap_unordered(self._build_topology, id_mol_tup_list): self.mol_group.update_property_by_idx(idx, "fragInfo", frag_info) + self.mol_group.update_property_by_idx(idx, "fragAllInfo", fragment_all_info) self.mol_group.update_property_by_idx(idx, "torsionInfo", torsion_info) self.mol_group.update_property_by_idx(idx, "atomInfo", atom_info) @time_logger - def init_docking_data(self, input_dir: Union[str, os.PathLike], batch_size: int = 20): + def init_docking_data(self, input_dir: Union[str, os.PathLike], batch_size: int = 20, props_list : List[str] = []): for sub_idx_list in self.mol_group.iter_idx_list(batch_size): input_list = [] for idx in sub_idx_list: - input_list += self.mol_group.write_sdf_by_idx(idx, save_dir=input_dir, seperate_conf=True) + input_list += self.mol_group.write_sdf_by_idx(idx, save_dir=input_dir, + seperate_conf=True, props_list=props_list) yield input_list, input_dir @time_logger @@ -120,11 +122,10 @@ def postprocessing(self, ligand_scores_list: zip, logging.debug([item[1] for item in mol_score_list]) logging.info(f"docking result pose num: {len(mol_score_list)}, keep num: {topn_conf}") self.mol_group.update_mol_confs_by_file_prefix(fprefix, - [copy.copy(mol) for mol, _ in - mol_score_list[:topn_conf]]) - self.mol_group.update_property_by_file_prefix(fprefix, score_name, - [score for _, score in - mol_score_list[:topn_conf]]) + [mol for mol, _ in mol_score_list[:topn_conf]]) + self.mol_group.update_property_by_file_prefix(fprefix, property_name=score_name, + value=[score for _, score in mol_score_list[:topn_conf]], + is_conf_prop=True) @time_logger def run_unidock(self, @@ -139,12 +140,14 @@ def run_unidock(self, local_only: bool = False, score_name: str = "docking_score", docking_dir_name : str = "docking_dir", + props_list : List[str] = [], ): input_dir = make_tmp_dir(f"{self.workdir}/{docking_dir_name}/docking_inputs", date=False) output_dir = make_tmp_dir(f"{self.workdir}/{docking_dir_name}/docking_results", date=False) for ligand_list, input_dir in self.init_docking_data( input_dir=input_dir, - batch_size=batch_size + batch_size=batch_size, + props_list=props_list, ): # Run docking ligands, scores_list = run_unidock( @@ -164,7 +167,7 @@ def save_results(self, save_dir: Union[str, os.PathLike] = ""): if not save_dir: save_dir = f"{self.workdir}/results_dir" save_dir = make_tmp_dir(str(save_dir), False, False) - res_list = self.mol_group.write_sdf(save_dir=save_dir, seperate_conf=False, conf_prefix="_unidock", conf_props=["docking_score"]) + res_list = self.mol_group.write_sdf(save_dir=save_dir, seperate_conf=False, conf_prefix="_unidock") return res_list diff --git a/unidock_tools/unidock_tools/modules/ligand_prep/torsion_tree.py b/unidock_tools/unidock_tools/modules/ligand_prep/torsion_tree.py index 814f956e..1c723298 100644 --- a/unidock_tools/unidock_tools/modules/ligand_prep/torsion_tree.py +++ b/unidock_tools/unidock_tools/modules/ligand_prep/torsion_tree.py @@ -409,10 +409,10 @@ def write_constraint_bpf_file(self, out_path: str = ''): for core_bpf_line in self.core_bpf_line_list: ligand_core_bpf_file.write(core_bpf_line) - def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str]: - fragment_info_string = '' - torsion_info_string = '' - atom_info_string = '' + def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str, str]: + frag_info_str = '' + torsion_info_str = '' + atom_info_str = '' num_nodes = self.torsion_tree.number_of_nodes() num_edges = self.torsion_tree.number_of_edges() @@ -420,11 +420,11 @@ def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str]: for node_idx in range(num_nodes): atom_info_list = self.torsion_tree.nodes[node_idx]['atom_info_list'] for atom_info_dict in atom_info_list: - fragment_info_string += str(atom_info_dict['sdf_atom_idx']) - fragment_info_string += ' ' + frag_info_str += str(atom_info_dict['sdf_atom_idx']) + frag_info_str += ' ' - fragment_info_string = fragment_info_string[:-1] - fragment_info_string += '\n' + frag_info_str = frag_info_str[:-1] + frag_info_str += '\n' edge_key_list = list(self.torsion_tree.edges.keys()) for edge_idx in range(num_edges): @@ -435,8 +435,8 @@ def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str]: begin_node_idx = str(edge_info_dict['begin_node_idx']) end_node_idx = str(edge_info_dict['end_node_idx']) - torsion_info_string += f'{begin_sdf_atom_idx} {end_sdf_atom_idx} {begin_node_idx} {end_node_idx}' - torsion_info_string += '\n' + torsion_info_str += f'{begin_sdf_atom_idx} {end_sdf_atom_idx} {begin_node_idx} {end_node_idx}' + torsion_info_str += '\n' for atom in self.mol.GetAtoms(): sdf_atom_idx = str(atom.GetIntProp('sdf_atom_idx')) @@ -445,16 +445,21 @@ def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str]: charge = "0" atom_type = atom.GetProp('atom_type') atom_info = str(sdf_atom_idx).ljust(3) + charge[:10].ljust(10) + atom_type.ljust(2) - atom_info_string += atom_info - atom_info_string += '\n' + atom_info_str += atom_info + atom_info_str += '\n' + + frag_all_info_str = " ".join([str(i) for i in range(1, 1 + self.mol.GetNumAtoms())]) - return fragment_info_string, torsion_info_string, atom_info_string + return frag_info_str, frag_all_info_str, torsion_info_str, atom_info_str - def write_sdf_file(self, out_file: str = ''): - fragment_info_string, torsion_info_string, atom_info_string = self.get_sdf_torsion_tree_info() - self.mol.SetProp("fragInfo", fragment_info_string) - self.mol.SetProp("torsionInfo", torsion_info_string) - self.mol.SetProp("atomInfo", atom_info_string) + def write_sdf_file(self, out_file: str = '', do_rigid_docking: bool = False): + frag_info_str, frag_all_info_str, torsion_info_str, atom_info_str = self.get_sdf_torsion_tree_info() + if do_rigid_docking: + self.mol.SetProp("fragInfo", frag_all_info_str) + else: + self.mol.SetProp("fragInfo", frag_info_str) + self.mol.SetProp("torsionInfo", torsion_info_str) + self.mol.SetProp("atomInfo", atom_info_str) if out_file: os.makedirs(os.path.dirname(os.path.abspath(out_file)), exist_ok=True) with Chem.SDWriter(out_file) as writer: diff --git a/unidock_tools/unidock_tools/utils/mol_group.py b/unidock_tools/unidock_tools/utils/mol_group.py index 232e3f59..6c1bcef4 100644 --- a/unidock_tools/unidock_tools/utils/mol_group.py +++ b/unidock_tools/unidock_tools/utils/mol_group.py @@ -13,15 +13,30 @@ class Mol: def __init__(self, mol: Chem.Mol, props: dict): - self.mol_confs = [mol] props.update(mol.GetPropsAsDict()) self.props = props + self.conf_props = dict() + mol = __class__.clear_rdkit_props(mol) + self.mol_confs = [mol] + + def __len__(self): + return len(self.mol_confs) + + @staticmethod + def clear_rdkit_props(mol: Chem.Mol) -> Chem.Mol: + mol = copy.copy(mol) + for prop in mol.GetPropNames(): + mol.ClearProp(prop) + return mol def get_prop(self, key: str, default_value: Optional[Any] = None) -> Any: return self.props.get(key, default_value) def get_props(self) -> dict: return self.props + + def get_conf_props(self) -> dict: + return self.conf_props def get_mol_confs(self) -> List[Chem.Mol]: return self.mol_confs @@ -35,6 +50,25 @@ def update_mol_confs(self, mol_confs: List[Chem.Mol]): def update_props(self, props: dict): self.props.update(props) + def update_conf_props(self, conf_props: dict): + assert all([len(self.mol_confs) == len(conf_props[prop]) for prop in conf_props]), \ + "conf props length should be same as mol_confs length" + self.conf_props.update(conf_props) + + def get_rdkit_mol_conf_with_props(self, conf_idx: int, props_list: List[str] = []): + mol = copy.copy(self.mol_confs[conf_idx]) + props = copy.deepcopy(self.get_props()) + props.update({k:v[conf_idx] for k, v in self.get_conf_props().items()}) + if props_list: + props = {k:v for k, v in props.items() if k in props_list} + if "fragAllInfo" in props: + frag_all_info_str = props.pop("fragAllInfo") + props["fragInfo"] = frag_all_info_str + else: + props = {k:v for k, v in props.items() if k not in ["file_prefix", "fragInfo", "fragAllInfo", "torsionInfo", "atomInfo"]} + set_properties(mol, props) + return mol + class MolGroup: def __init__(self, ligand_files: List[Path]): @@ -58,8 +92,11 @@ def _initialize(self, ligand_files: List[Path]): if mol: self.mol_group.append(Mol(mol, {"file_prefix": file_prefix})) - def update_property_by_idx(self, idx: int, property_name: str, value: Any): - self.mol_group[idx].update_props({property_name: value}) + def update_property_by_idx(self, idx: int, property_name: str, value: Any, is_conf_prop: bool = False): + if is_conf_prop: + self.mol_group[idx].update_conf_props({property_name: value}) + else: + self.mol_group[idx].update_props({property_name: value}) def update_mol_confs(self, idx: int, mol_confs: List[Chem.Mol]): if not isinstance(mol_confs, list): @@ -75,43 +112,38 @@ def update_mol_confs_by_file_prefix(self, file_prefix: str, mol_confs_list: List return self.update_mol_confs(file_prefix_dict[file_prefix], mol_confs_list) - def update_property_by_file_prefix(self, file_prefix: str, property_name: str, value: Any): + def update_property_by_file_prefix(self, file_prefix: str, + property_name: str, value: Any, is_conf_prop: bool = False): file_prefix_dict = {mol.get_prop("file_prefix", ""): idx for idx, mol in enumerate(self.mol_group)} logging.debug(file_prefix_dict) if file_prefix not in file_prefix_dict: logging.error(f"Cannot find {file_prefix} in MoleculeGroup") return - self.update_property_by_idx(file_prefix_dict[file_prefix], property_name, value) + self.update_property_by_idx(file_prefix_dict[file_prefix], property_name, value, is_conf_prop) def write_sdf_by_idx(self, idx: int, save_dir: Union[str, os.PathLike], seperate_conf: bool = False, conf_prefix: str = "_CONF", - conf_props: List[str] = [], + props_list: List[str] = [], ) -> List[Path]: save_dir = make_tmp_dir(str(save_dir), False, False) - mol_confs = self.mol_group[idx].get_mol_confs() - mol_confs_copy = [None] * len(mol_confs) - props = self.mol_group[idx].get_props() - for conf_id, mol_conf in enumerate(mol_confs): - mol_conf_copy = copy.copy(mol_conf) - props_copy = copy.deepcopy(props) - for key in props: - if key in conf_props and isinstance(props[key], list): - props_copy[key] = props[key][conf_id] - set_properties(mol_conf_copy, props_copy) - mol_confs_copy[conf_id] = mol_conf_copy + mol_confs_copy = [None] * len(self.mol_group[idx]) + for conf_idx in range(len(self.mol_group[idx])): + mol_conf_copy = self.mol_group[idx].get_rdkit_mol_conf_with_props(conf_idx, props_list) + mol_confs_copy[conf_idx] = mol_conf_copy # save SDF files + file_prefix = self.mol_group[idx].get_props()['file_prefix'] sdf_file_list = [] if seperate_conf: for conf_id, mol_conf in enumerate(mol_confs_copy): - save_name = f"{save_dir}/{props['file_prefix']}{conf_prefix}{conf_id}.sdf" + save_name = f"{save_dir}/{file_prefix}{conf_prefix}{conf_id}.sdf" sdf_writer([mol_conf], save_name) sdf_file_list.append(Path(save_name)) else: - save_name = f"{save_dir}/{props['file_prefix']}.sdf" + save_name = f"{save_dir}/{file_prefix}.sdf" sdf_writer(mol_confs_copy, save_name) sdf_file_list.append(Path(save_name)) return sdf_file_list @@ -119,11 +151,11 @@ def write_sdf_by_idx(self, def write_sdf(self, save_dir: Union[str, os.PathLike], seperate_conf: bool = False, conf_prefix: str = "_CONF", - conf_props: List[str] = []) -> List[Path]: + props_list: List[str] = []) -> List[Path]: result_files = [] for idx in range(len(self.mol_group)): result_files.extend(self.write_sdf_by_idx(idx=idx, save_dir=save_dir, seperate_conf=seperate_conf, conf_prefix=conf_prefix, - conf_props=conf_props)) + props_list=props_list)) return result_files