Skip to content

Commit

Permalink
Merge pull request #26 from radifar/smarts-filter-charge
Browse files Browse the repository at this point in the history
feat: add filter charge
  • Loading branch information
radifar authored Dec 18, 2023
2 parents d5f7e42 + 8404af4 commit 869d6a1
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 1 deletion.
43 changes: 43 additions & 0 deletions src/deemian/chem/interactions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from rdkit.Chem import AllChem as Chem

from deemian.chem.utility import flatten_atom_list


def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]:
apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"]
apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"]
apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern
apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion]

potential_cation = [
"[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]",
"[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]",
"[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]",
"NC(=N)",
"[n;R1]1[c;R1][n;R1][c;R1][c;R1]1",
]
potential_anion = ["O=[C,S,N,P]-[OH,O-]"]
potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation]
potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion]

all_cation = apparent_cation + potential_cation
all_anion = apparent_anion + potential_anion

filter_dictionary = dict(
apparent_cation=apparent_cation,
apparent_anion=apparent_anion,
potential_cation=potential_cation,
potential_anion=potential_anion,
all_cation=all_cation,
all_anion=all_anion,
)

combined_match = []
try:
for pattern in filter_dictionary[mode]:
matched = flatten_atom_list(mol.GetSubstructMatches(pattern))
combined_match.append(matched)
except KeyError:
raise KeyError(f"Error: filtering mode {mode} is not recognized")

return flatten_atom_list(combined_match)
6 changes: 6 additions & 0 deletions src/deemian/chem/utility.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from itertools import chain

import pandas as pd


def flatten_atom_list(nested_list_of_atoms):
return list(set(chain.from_iterable(nested_list_of_atoms)))


def dataframe_to_pdb_block(df: pd.DataFrame) -> str:
df_noh = df[df["atom_symbol"] != "H"]
# from https://stackoverflow.com/questions/35491274/split-a-pandas-column-of-lists-into-multiple-columns
Expand Down
62 changes: 62 additions & 0 deletions tests/test_chem_interactions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest
from rdkit.Chem import AllChem as Chem

from deemian.chem.interactions import filter_charge


@pytest.mark.parametrize(
"smiles, n_cation, n_anion, n_ionizable_cation, n_ionizable_anion, all_cation, all_anion",
[
("CC(=O)NCCCS(O)(=O)=O", 0, 0, 0, 4, 0, 4), # acamprosate
("CC(=O)NCCCS(=O)(=O)[O-]", 0, 4, 0, 4, 0, 4), # acamprosate -
("CCC(CC)O[C@@H]1C=C(C[C@@H]([C@H]1NC(=O)C)N)C(=O)O", 0, 0, 1, 3, 1, 3), # oseltamivir
("CCC(CC)O[C@@H]1C=C(C(=O)O)C[C@H]([NH3+])[C@H]1NC(C)=O", 1, 0, 0, 3, 1, 3), # oseltamivir +
("CCC(CC)O[C@@H]1C=C(C(=O)[O-])C[C@H](N)[C@H]1NC(C)=O", 0, 3, 1, 3, 1, 3), # oseltamivir -
("CCC(CC)O[C@@H]1C=C(C(=O)[O-])C[C@H]([NH3+])[C@H]1NC(C)=O", 1, 3, 0, 3, 1, 3), # oseltamivir +-
("NC[C@@H](F)CP(O)=O", 0, 0, 1, 3, 1, 3), # lesogaberan
("[NH3+]C[C@@H](F)C[PH](=O)O", 1, 0, 0, 3, 1, 3), # lesogaberan +
("NC[C@@H](F)C[PH](=O)[O-]", 0, 3, 1, 3, 1, 3), # lesogaberan -
("[NH3+]C[C@@H](F)C[PH](=O)[O-]", 1, 3, 0, 3, 1, 3), # lesogaberan +-
("CN(C)C(=N)NC(N)=N", 0, 0, 7, 0, 7, 0), # metformin
("CN(C)C(=[NH2+])N=C(N)N", 3, 0, 7, 0, 7, 0), # metformin +
("N[C@@H](CCCNC(=N)N[N+]([O-])=O)C(O)=O", 0, 3, 5, 6, 5, 6), # nitroarginine
("NC(=N[N+](=O)[O-])NCCC[C@H]([NH3+])C(=O)O", 1, 3, 4, 6, 5, 6), # nitroarginine +
("NC(=N[N+](=O)[O-])NCCC[C@H](N)C(=O)[O-]", 0, 6, 5, 6, 5, 6), # nitroarginine -
("NC(=N[N+](=O)[O-])NCCC[C@H]([NH3+])C(=O)[O-]", 1, 6, 4, 6, 5, 6), # nitroarginine +-
(
"N=C(N)NCCC[C@H](NC(=O)[C@@H](N)CCCCN)C(=O)N[C@@H](CC(=O)O)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](Cc1c[nH]cn1)C(=O)O", # noqa: E501
0,
0,
11,
9,
11,
9,
), # pentapeptide KRDEH generated with RDKit
],
)
def test_filter_charge(smiles, n_cation, n_anion, n_ionizable_cation, n_ionizable_anion, all_cation, all_anion):
"""
test SMILES retrieved from Drugbank, and the charged version generated using SwissParam
with MMFF+Match parameter. Except Oseltamivir SMILES which retrieved from PDB with ID: G39.
"""
mol = Chem.MolFromSmiles(smiles)
n_cation_result = len(filter_charge(mol, "apparent_cation"))
n_anion_result = len(filter_charge(mol, "apparent_anion"))
n_ionizable_cation_result = len(filter_charge(mol, "potential_cation"))
n_ionizable_anion_result = len(filter_charge(mol, "potential_anion"))
all_cation_result = len(filter_charge(mol, "all_cation"))
all_anion_result = len(filter_charge(mol, "all_anion"))

assert n_cation == n_cation_result
assert n_anion == n_anion_result
assert n_ionizable_cation == n_ionizable_cation_result
assert n_ionizable_anion == n_ionizable_anion_result
assert all_cation == all_cation_result
assert all_anion == all_anion_result


def test_filter_charge_keyerror():
with pytest.raises(KeyError) as e_info:
mol = Chem.MolFromSmiles("CC(=O)NCCCS(O)(=O)=O")
filter_charge(mol, "cation")
assert str(e_info.value) == "'Error: filtering mode cation is not recognized'"
11 changes: 10 additions & 1 deletion tests/test_chem_utility.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
import pandas as pd
from rdkit.Chem import AllChem as Chem

from deemian.chem.utility import dataframe_to_pdb_block
from deemian.chem.utility import dataframe_to_pdb_block, flatten_atom_list


def test_flatten_atom_list():
test_list = [[1, 2, 3], [4, 5, 6], [1, 10, 12]]
expected_list = [1, 2, 3, 4, 5, 6, 10, 12]

result_list = flatten_atom_list(test_list)

assert result_list == expected_list


def test_dataframe_to_pdb_block():
Expand Down

0 comments on commit 869d6a1

Please sign in to comment.