diff --git a/src/deemian/chem/interactions.py b/src/deemian/chem/interactions.py new file mode 100644 index 0000000..7fe1b9b --- /dev/null +++ b/src/deemian/chem/interactions.py @@ -0,0 +1,43 @@ +from rdkit.Chem import AllChem as Chem + +from deemian.chem.utility import flatten_atom_list + + +def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]: + apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"] + apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"] + apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern + apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion] + + potential_cation = [ + "[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]", + "[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", + "[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", + "NC(=N)", + "[n;R1]1[c;R1][n;R1][c;R1][c;R1]1", + ] + potential_anion = ["O=[C,S,N,P]-[OH,O-]"] + potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation] + potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion] + + all_cation = apparent_cation + potential_cation + all_anion = apparent_anion + potential_anion + + filter_dictionary = dict( + apparent_cation=apparent_cation, + apparent_anion=apparent_anion, + potential_cation=potential_cation, + potential_anion=potential_anion, + all_cation=all_cation, + all_anion=all_anion, + ) + + combined_match = [] + try: + for pattern in filter_dictionary[mode]: + matched = flatten_atom_list(mol.GetSubstructMatches(pattern)) + combined_match.append(matched) + except KeyError: + raise KeyError(f"Error: filtering mode {mode} is not recognized") + + return flatten_atom_list(combined_match) diff --git a/src/deemian/chem/utility.py b/src/deemian/chem/utility.py index 3b9824f..b4a2815 100644 --- a/src/deemian/chem/utility.py +++ b/src/deemian/chem/utility.py @@ -1,6 +1,12 @@ +from itertools import chain + import pandas as pd +def flatten_atom_list(nested_list_of_atoms): + return list(set(chain.from_iterable(nested_list_of_atoms))) + + def dataframe_to_pdb_block(df: pd.DataFrame) -> str: df_noh = df[df["atom_symbol"] != "H"] # from https://stackoverflow.com/questions/35491274/split-a-pandas-column-of-lists-into-multiple-columns diff --git a/tests/test_chem_interactions.py b/tests/test_chem_interactions.py new file mode 100644 index 0000000..7b6117a --- /dev/null +++ b/tests/test_chem_interactions.py @@ -0,0 +1,62 @@ +import pytest +from rdkit.Chem import AllChem as Chem + +from deemian.chem.interactions import filter_charge + + +@pytest.mark.parametrize( + "smiles, n_cation, n_anion, n_ionizable_cation, n_ionizable_anion, all_cation, all_anion", + [ + ("CC(=O)NCCCS(O)(=O)=O", 0, 0, 0, 4, 0, 4), # acamprosate + ("CC(=O)NCCCS(=O)(=O)[O-]", 0, 4, 0, 4, 0, 4), # acamprosate - + ("CCC(CC)O[C@@H]1C=C(C[C@@H]([C@H]1NC(=O)C)N)C(=O)O", 0, 0, 1, 3, 1, 3), # oseltamivir + ("CCC(CC)O[C@@H]1C=C(C(=O)O)C[C@H]([NH3+])[C@H]1NC(C)=O", 1, 0, 0, 3, 1, 3), # oseltamivir + + ("CCC(CC)O[C@@H]1C=C(C(=O)[O-])C[C@H](N)[C@H]1NC(C)=O", 0, 3, 1, 3, 1, 3), # oseltamivir - + ("CCC(CC)O[C@@H]1C=C(C(=O)[O-])C[C@H]([NH3+])[C@H]1NC(C)=O", 1, 3, 0, 3, 1, 3), # oseltamivir +- + ("NC[C@@H](F)CP(O)=O", 0, 0, 1, 3, 1, 3), # lesogaberan + ("[NH3+]C[C@@H](F)C[PH](=O)O", 1, 0, 0, 3, 1, 3), # lesogaberan + + ("NC[C@@H](F)C[PH](=O)[O-]", 0, 3, 1, 3, 1, 3), # lesogaberan - + ("[NH3+]C[C@@H](F)C[PH](=O)[O-]", 1, 3, 0, 3, 1, 3), # lesogaberan +- + ("CN(C)C(=N)NC(N)=N", 0, 0, 7, 0, 7, 0), # metformin + ("CN(C)C(=[NH2+])N=C(N)N", 3, 0, 7, 0, 7, 0), # metformin + + ("N[C@@H](CCCNC(=N)N[N+]([O-])=O)C(O)=O", 0, 3, 5, 6, 5, 6), # nitroarginine + ("NC(=N[N+](=O)[O-])NCCC[C@H]([NH3+])C(=O)O", 1, 3, 4, 6, 5, 6), # nitroarginine + + ("NC(=N[N+](=O)[O-])NCCC[C@H](N)C(=O)[O-]", 0, 6, 5, 6, 5, 6), # nitroarginine - + ("NC(=N[N+](=O)[O-])NCCC[C@H]([NH3+])C(=O)[O-]", 1, 6, 4, 6, 5, 6), # nitroarginine +- + ( + "N=C(N)NCCC[C@H](NC(=O)[C@@H](N)CCCCN)C(=O)N[C@@H](CC(=O)O)C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](Cc1c[nH]cn1)C(=O)O", # noqa: E501 + 0, + 0, + 11, + 9, + 11, + 9, + ), # pentapeptide KRDEH generated with RDKit + ], +) +def test_filter_charge(smiles, n_cation, n_anion, n_ionizable_cation, n_ionizable_anion, all_cation, all_anion): + """ + test SMILES retrieved from Drugbank, and the charged version generated using SwissParam + with MMFF+Match parameter. Except Oseltamivir SMILES which retrieved from PDB with ID: G39. + """ + mol = Chem.MolFromSmiles(smiles) + n_cation_result = len(filter_charge(mol, "apparent_cation")) + n_anion_result = len(filter_charge(mol, "apparent_anion")) + n_ionizable_cation_result = len(filter_charge(mol, "potential_cation")) + n_ionizable_anion_result = len(filter_charge(mol, "potential_anion")) + all_cation_result = len(filter_charge(mol, "all_cation")) + all_anion_result = len(filter_charge(mol, "all_anion")) + + assert n_cation == n_cation_result + assert n_anion == n_anion_result + assert n_ionizable_cation == n_ionizable_cation_result + assert n_ionizable_anion == n_ionizable_anion_result + assert all_cation == all_cation_result + assert all_anion == all_anion_result + + +def test_filter_charge_keyerror(): + with pytest.raises(KeyError) as e_info: + mol = Chem.MolFromSmiles("CC(=O)NCCCS(O)(=O)=O") + filter_charge(mol, "cation") + assert str(e_info.value) == "'Error: filtering mode cation is not recognized'" diff --git a/tests/test_chem_utility.py b/tests/test_chem_utility.py index c286e7f..9703ba2 100644 --- a/tests/test_chem_utility.py +++ b/tests/test_chem_utility.py @@ -1,7 +1,16 @@ import pandas as pd from rdkit.Chem import AllChem as Chem -from deemian.chem.utility import dataframe_to_pdb_block +from deemian.chem.utility import dataframe_to_pdb_block, flatten_atom_list + + +def test_flatten_atom_list(): + test_list = [[1, 2, 3], [4, 5, 6], [1, 10, 12]] + expected_list = [1, 2, 3, 4, 5, 6, 10, 12] + + result_list = flatten_atom_list(test_list) + + assert result_list == expected_list def test_dataframe_to_pdb_block():