Skip to content

Commit

Permalink
changed on-the-fly auto cut strategy to minimize the number of each c…
Browse files Browse the repository at this point in the history
…utting label in product fragments, added tools to automatically generate partial reattachment reactions, added reaction reversibility to yml writer
  • Loading branch information
donerancl committed Jun 4, 2024
1 parent 6a5ebf4 commit b6bb8ae
Show file tree
Hide file tree
Showing 4 changed files with 342 additions and 63 deletions.
150 changes: 93 additions & 57 deletions rmgpy/molecule/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from rmgpy.molecule.molecule import Atom, Bond, Molecule
from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType
from rdkit import Chem

from numpy.random import randint
# this variable is used to name atom IDs so that there are as few conflicts by
# using the entire space of integer objects
ATOM_ID_COUNTER = -(2**15)
Expand Down Expand Up @@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non
frag_list.append(res_frag)
return frag_list

def sliceitup_arom(self, molecule, size_threshold=None):
def sliceitup_arom(self, molecule, size_threshold=5):
"""
Several specified aromatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -950,29 +945,52 @@ def sliceitup_arom(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
try:
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif randint(0,1)==1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
except:
continue
else:
# no match for this pattern
Expand Down Expand Up @@ -1014,15 +1032,10 @@ def sliceitup_arom(self, molecule, size_threshold=None):
frag_list_new.append(res_frag)
return frag_list_new

def sliceitup_aliph(self, molecule, size_threshold=None):
def sliceitup_aliph(self, molecule, size_threshold=5):
"""
Several specified aliphatic patterns
"""
# set min size for each aliphatic fragment size
if size_threshold:
size_threshold = size_threshold
else:
size_threshold = 5
# if input is smiles string, output smiles
if isinstance(molecule, str):
molecule_smiles = molecule
Expand Down Expand Up @@ -1079,29 +1092,52 @@ def sliceitup_aliph(self, molecule, size_threshold=None):
# mol_set contains new set of fragments
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
# check all fragments' size
if all(
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
>= size_threshold
for mol in mol_set
):
# replace * at cutting position with cutting label
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if len(mol_set) > 2: # means it cut into 3 fragments
if frag.count("*") > 1:
# replace both with R
frag_smi = frag.replace("*", "R")
try:
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
if len(mol_set) == 2:
frag1 = Chem.MolToSmiles(mol_set[0])
frag2 = Chem.MolToSmiles(mol_set[1])

frag1_R = frag1.count("Na")
frag1_L = frag1.count("K")
frag2_R = frag2.count("Na")
frag2_L = frag2.count("K")

if frag1_R > frag2_R and frag1_L <= frag2_L:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
elif frag1_L > frag2_L and frag1_R <= frag2_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_L > frag1_L and frag2_R <= frag1_R:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif frag2_R > frag1_R and frag2_L <= frag1_L:
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")
elif randint(0,1)==1:
frag1_smi = frag1.replace("*", "L")
frag2_smi = frag2.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
else: # means it only cut once, generate 2 fragments
if ind == 0:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
frag1_smi = frag1.replace("*", "R")
frag2_smi = frag2.replace("*", "L")

frag_list = [frag1_smi, frag2_smi]

elif len(mol_set) > 2: # means it cut into 3 fragments
frag_list = []
for ind, rdmol in enumerate(mol_set):
frag = Chem.MolToSmiles(rdmol)
if frag.count("*") > 1:
frag_smi = frag.replace("*", "R")
else:
frag_smi = frag.replace("*", "L")
frag_list.append(frag_smi)
break
else:
# turn to next matched_atom_map
continue
except:
continue
else:
# no match for this pattern
Expand Down
Loading

0 comments on commit b6bb8ae

Please sign in to comment.