-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #26 from radifar/smarts-filter-charge
feat: add filter charge
- Loading branch information
Showing
4 changed files
with
121 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from rdkit.Chem import AllChem as Chem | ||
|
||
from deemian.chem.utility import flatten_atom_list | ||
|
||
|
||
def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]: | ||
apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"] | ||
apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"] | ||
apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern | ||
apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion] | ||
|
||
potential_cation = [ | ||
"[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]", | ||
"[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", | ||
"[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", | ||
"NC(=N)", | ||
"[n;R1]1[c;R1][n;R1][c;R1][c;R1]1", | ||
] | ||
potential_anion = ["O=[C,S,N,P]-[OH,O-]"] | ||
potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation] | ||
potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion] | ||
|
||
all_cation = apparent_cation + potential_cation | ||
all_anion = apparent_anion + potential_anion | ||
|
||
filter_dictionary = dict( | ||
apparent_cation=apparent_cation, | ||
apparent_anion=apparent_anion, | ||
potential_cation=potential_cation, | ||
potential_anion=potential_anion, | ||
all_cation=all_cation, | ||
all_anion=all_anion, | ||
) | ||
|
||
combined_match = [] | ||
try: | ||
for pattern in filter_dictionary[mode]: | ||
matched = flatten_atom_list(mol.GetSubstructMatches(pattern)) | ||
combined_match.append(matched) | ||
except KeyError: | ||
raise KeyError(f"Error: filtering mode {mode} is not recognized") | ||
|
||
return flatten_atom_list(combined_match) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import pytest | ||
from rdkit.Chem import AllChem as Chem | ||
|
||
from deemian.chem.interactions import filter_charge | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"smiles, n_cation, n_anion, n_ionizable_cation, n_ionizable_anion, all_cation, all_anion", | ||
[ | ||
("CC(=O)NCCCS(O)(=O)=O", 0, 0, 0, 4, 0, 4), # acamprosate | ||
("CC(=O)NCCCS(=O)(=O)[O-]", 0, 4, 0, 4, 0, 4), # acamprosate - | ||
("CCC(CC)O[C@@H]1C=C(C[C@@H]([C@H]1NC(=O)C)N)C(=O)O", 0, 0, 1, 3, 1, 3), # oseltamivir | ||
("CCC(CC)O[C@@H]1C=C(C(=O)O)C[C@H]([NH3+])[C@H]1NC(C)=O", 1, 0, 0, 3, 1, 3), # oseltamivir + | ||
("CCC(CC)O[C@@H]1C=C(C(=O)[O-])C[C@H](N)[C@H]1NC(C)=O", 0, 3, 1, 3, 1, 3), # oseltamivir - | ||
("CCC(CC)O[C@@H]1C=C(C(=O)[O-])C[C@H]([NH3+])[C@H]1NC(C)=O", 1, 3, 0, 3, 1, 3), # oseltamivir +- | ||
("NC[C@@H](F)CP(O)=O", 0, 0, 1, 3, 1, 3), # lesogaberan | ||
("[NH3+]C[C@@H](F)C[PH](=O)O", 1, 0, 0, 3, 1, 3), # lesogaberan + | ||
("NC[C@@H](F)C[PH](=O)[O-]", 0, 3, 1, 3, 1, 3), # lesogaberan - | ||
("[NH3+]C[C@@H](F)C[PH](=O)[O-]", 1, 3, 0, 3, 1, 3), # lesogaberan +- | ||
("CN(C)C(=N)NC(N)=N", 0, 0, 7, 0, 7, 0), # metformin | ||
("CN(C)C(=[NH2+])N=C(N)N", 3, 0, 7, 0, 7, 0), # metformin + | ||
("N[C@@H](CCCNC(=N)N[N+]([O-])=O)C(O)=O", 0, 3, 5, 6, 5, 6), # nitroarginine | ||
("NC(=N[N+](=O)[O-])NCCC[C@H]([NH3+])C(=O)O", 1, 3, 4, 6, 5, 6), # nitroarginine + | ||
("NC(=N[N+](=O)[O-])NCCC[C@H](N)C(=O)[O-]", 0, 6, 5, 6, 5, 6), # nitroarginine - | ||
("NC(=N[N+](=O)[O-])NCCC[C@H]([NH3+])C(=O)[O-]", 1, 6, 4, 6, 5, 6), # nitroarginine +- | ||
( | ||
"N=C(N)NCCC[C@H](NC(=O)[C@@H](N)CCCCN)C(=O)N[C@@H](CC(=O)O)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](Cc1c[nH]cn1)C(=O)O", # noqa: E501 | ||
0, | ||
0, | ||
11, | ||
9, | ||
11, | ||
9, | ||
), # pentapeptide KRDEH generated with RDKit | ||
], | ||
) | ||
def test_filter_charge(smiles, n_cation, n_anion, n_ionizable_cation, n_ionizable_anion, all_cation, all_anion): | ||
""" | ||
test SMILES retrieved from Drugbank, and the charged version generated using SwissParam | ||
with MMFF+Match parameter. Except Oseltamivir SMILES which retrieved from PDB with ID: G39. | ||
""" | ||
mol = Chem.MolFromSmiles(smiles) | ||
n_cation_result = len(filter_charge(mol, "apparent_cation")) | ||
n_anion_result = len(filter_charge(mol, "apparent_anion")) | ||
n_ionizable_cation_result = len(filter_charge(mol, "potential_cation")) | ||
n_ionizable_anion_result = len(filter_charge(mol, "potential_anion")) | ||
all_cation_result = len(filter_charge(mol, "all_cation")) | ||
all_anion_result = len(filter_charge(mol, "all_anion")) | ||
|
||
assert n_cation == n_cation_result | ||
assert n_anion == n_anion_result | ||
assert n_ionizable_cation == n_ionizable_cation_result | ||
assert n_ionizable_anion == n_ionizable_anion_result | ||
assert all_cation == all_cation_result | ||
assert all_anion == all_anion_result | ||
|
||
|
||
def test_filter_charge_keyerror(): | ||
with pytest.raises(KeyError) as e_info: | ||
mol = Chem.MolFromSmiles("CC(=O)NCCCS(O)(=O)=O") | ||
filter_charge(mol, "cation") | ||
assert str(e_info.value) == "'Error: filtering mode cation is not recognized'" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters