-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #27 from radifar/identify-electrostatic-interactions
Identify electrostatic interactions
- Loading branch information
Showing
25 changed files
with
601 additions
and
460 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
from numpy.linalg import norm as dist | ||
import pandas as pd | ||
from rdkit.Chem import AllChem as Chem | ||
|
||
from deemian.chem.utility import flatten_atom_list | ||
|
||
|
||
def generate_pair_info(query_result, s1_df, s2_df, conformation, interaction_type): | ||
pair_info_df = pd.DataFrame() | ||
for i, s2_list in enumerate(query_result): | ||
s1 = s1_df.iloc[[i]].reset_index() | ||
s2 = s2_df.iloc[s2_list].reset_index() | ||
s2["atom_id_1"] = s1.iloc[0].atom_id | ||
pair = s1.merge(s2, left_on="atom_id", right_on="atom_id_1", suffixes=("_s1", "_s2")).drop( | ||
columns=["atom_id_1"] | ||
) | ||
pair_info_df = pd.concat([pair_info_df, pair]) | ||
|
||
pair_info_df.reset_index(drop=True, inplace=True) | ||
|
||
conf_1 = f"conf_{conformation}_s1" | ||
conf_2 = f"conf_{conformation}_s2" | ||
pair_info_df["distance"] = (pair_info_df[conf_1] - pair_info_df[conf_2]).map(dist) | ||
pair_info_df["conformation"] = conformation | ||
pair_info_df["interaction_type"] = interaction_type | ||
|
||
return pair_info_df | ||
|
||
|
||
def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]: | ||
apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"] | ||
apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"] | ||
apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern | ||
apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion] | ||
|
||
potential_cation = [ | ||
"[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]", | ||
"[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", | ||
"[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", | ||
"NC(=N)", | ||
"[n;R1]1[c;R1][n;R1][c;R1][c;R1]1", | ||
] | ||
potential_anion = ["O=[C,S,N,P]-[OH,O-]"] | ||
potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation] | ||
potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion] | ||
|
||
all_cation = apparent_cation + potential_cation | ||
all_anion = apparent_anion + potential_anion | ||
|
||
filter_dictionary = dict( | ||
apparent_cation=apparent_cation, | ||
apparent_anion=apparent_anion, | ||
potential_cation=potential_cation, | ||
potential_anion=potential_anion, | ||
all_cation=all_cation, | ||
all_anion=all_anion, | ||
) | ||
|
||
combined_match = [] | ||
try: | ||
for pattern in filter_dictionary[mode]: | ||
matched = flatten_atom_list(mol.GetSubstructMatches(pattern)) | ||
combined_match.append(matched) | ||
except KeyError: | ||
raise KeyError(f"Error: filtering mode {mode} is not recognized") | ||
|
||
return flatten_atom_list(combined_match) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,63 @@ | ||
from rdkit.Chem import AllChem as Chem | ||
from dataclasses import dataclass | ||
|
||
from deemian.chem.utility import flatten_atom_list | ||
|
||
|
||
def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]: | ||
apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"] | ||
apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"] | ||
apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern | ||
apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion] | ||
|
||
potential_cation = [ | ||
"[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]", | ||
"[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", | ||
"[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]", | ||
"NC(=N)", | ||
"[n;R1]1[c;R1][n;R1][c;R1][c;R1]1", | ||
] | ||
potential_anion = ["O=[C,S,N,P]-[OH,O-]"] | ||
potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation] | ||
potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion] | ||
|
||
all_cation = apparent_cation + potential_cation | ||
all_anion = apparent_anion + potential_anion | ||
|
||
filter_dictionary = dict( | ||
apparent_cation=apparent_cation, | ||
apparent_anion=apparent_anion, | ||
potential_cation=potential_cation, | ||
potential_anion=potential_anion, | ||
all_cation=all_cation, | ||
all_anion=all_anion, | ||
) | ||
|
||
combined_match = [] | ||
try: | ||
for pattern in filter_dictionary[mode]: | ||
matched = flatten_atom_list(mol.GetSubstructMatches(pattern)) | ||
combined_match.append(matched) | ||
except KeyError: | ||
raise KeyError(f"Error: filtering mode {mode} is not recognized") | ||
|
||
return flatten_atom_list(combined_match) | ||
import pandas as pd | ||
from rdkit.Chem import AllChem as Chem | ||
from scipy.spatial import KDTree | ||
|
||
from deemian.chem.interaction_utility import filter_charge, generate_pair_info | ||
|
||
|
||
@dataclass | ||
class InteractionData: | ||
subject_1_mol: Chem.rdchem.Mol | ||
subject_2_mol: Chem.rdchem.Mol | ||
subject_1_df: pd.DataFrame | ||
subject_2_df: pd.DataFrame | ||
conformation: list | ||
electrostatic_s1_as_cation: pd.DataFrame = None | ||
electrostatic_s1_as_anion: pd.DataFrame = None | ||
|
||
def calculate_electrostatic(self, positive: bool, negative: bool): | ||
cation_mode = "all_cation" if positive else "apparent_cation" | ||
anion_mode = "all_anion" if negative else "apparent_anion" | ||
|
||
cation_1_ids = filter_charge(self.subject_1_mol, cation_mode) | ||
anion_1_ids = filter_charge(self.subject_1_mol, anion_mode) | ||
cation_2_ids = filter_charge(self.subject_2_mol, cation_mode) | ||
anion_2_ids = filter_charge(self.subject_2_mol, anion_mode) | ||
|
||
df1 = self.subject_1_df | ||
df2 = self.subject_2_df | ||
exclude_atom = ["C", "N", "S", "P"] | ||
cation_1_df = df1[df1.index.isin(cation_1_ids)] | ||
anion_1_df = df1[df1.index.isin(anion_1_ids) & ~df1["atom_symbol"].isin(exclude_atom)] | ||
cation_2_df = df2[df2.index.isin(cation_2_ids)] | ||
anion_2_df = df2[df2.index.isin(anion_2_ids) & ~df2["atom_symbol"].isin(exclude_atom)] | ||
|
||
for conf_num in self.conformation: | ||
conformation_column = "conf_" + str(conf_num) | ||
|
||
if (not cation_1_df.empty) and (not anion_2_df.empty): | ||
print(cation_1_df.empty, anion_2_df.empty) | ||
cation_1_tree = KDTree(cation_1_df[conformation_column].to_list()) | ||
anion_2_tree = KDTree(anion_2_df[conformation_column].to_list()) | ||
|
||
s1_as_cation = cation_1_tree.query_ball_tree(anion_2_tree, 4.5) | ||
|
||
self.electrostatic_s1_as_cation = generate_pair_info( | ||
s1_as_cation, cation_1_df, anion_2_df, conf_num, "electrostatic_cation" | ||
) | ||
else: | ||
self.electrostatic_s1_as_cation = pd.DataFrame() | ||
|
||
if (not cation_2_df.empty) and (not anion_1_df.empty): | ||
cation_2_tree = KDTree(cation_2_df[conformation_column].to_list()) | ||
anion_1_tree = KDTree(anion_1_df[conformation_column].to_list()) | ||
|
||
s1_as_anion = anion_1_tree.query_ball_tree(cation_2_tree, 4.5) | ||
|
||
self.electrostatic_s1_as_anion = generate_pair_info( | ||
s1_as_anion, anion_1_df, cation_2_df, conf_num, "electrostatic_anion" | ||
) | ||
else: | ||
self.electrostatic_s1_as_anion = pd.DataFrame() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,77 +1,106 @@ | ||
from collections import defaultdict | ||
from dataclasses import dataclass, field | ||
|
||
import pandas as pd | ||
from rdkit.Chem import AllChem as Chem | ||
|
||
from deemian.chem.interactions import InteractionData | ||
from deemian.chem.reader import mol_to_dataframe | ||
from deemian.chem.selection import mol_dataframe_selection | ||
from deemian.chem.utility import dataframe_to_pdb_block | ||
|
||
|
||
@dataclass | ||
class DeemianData: | ||
molecule_dataframe: dict = field(default_factory=lambda: {}) | ||
molecule_pdb_block: dict = field(default_factory=lambda: {}) | ||
selections: dict = field(default_factory=lambda: {}) | ||
class Molecule: | ||
rdkit_mol: Chem.rdchem.Mol | ||
mol_dataframe: pd.DataFrame = None | ||
|
||
|
||
@dataclass | ||
class Selection: | ||
mol_parent: str | ||
mol_dataframe: pd.DataFrame | ||
mol_pdb_block: str = None | ||
|
||
|
||
@dataclass | ||
class Measurement: | ||
interactions: list = field(default_factory=lambda: []) | ||
ionizable: dict = field(default_factory=lambda: {"positive": False, "negative": False}) | ||
interacting_subjects: dict = field(default_factory=lambda: {}) | ||
conformation: list = field(default_factory=lambda: []) | ||
interaction_details: dict = field(default_factory=lambda: {}) | ||
readable_output: dict = field(default_factory=lambda: {}) | ||
|
||
def conformation_range(self, start, end): | ||
self.conformation = list(range(int(start), int(end) + 1)) | ||
|
||
class DeemianDataBuilder: | ||
def __init__(self, deemian_data: DeemianData) -> None: | ||
self.deemian_data = deemian_data | ||
def set_ionizable(self, charge: str, boolean: str): | ||
if boolean == "true": | ||
boolean = True | ||
elif boolean == "false": | ||
boolean = False | ||
self.ionizable[charge] = boolean | ||
|
||
def read_molecule(self, mol_filename: str): | ||
mol = Chem.MolFromPDBFile(mol_filename, removeHs=False) | ||
|
||
@dataclass | ||
class DeemianData: | ||
molecule: dict[str, Molecule] = field(default_factory=lambda: {}) | ||
selection: dict[str, Selection] = field(default_factory=lambda: {}) | ||
measurement: dict[str, Measurement] = field(default_factory=lambda: defaultdict(Measurement)) | ||
interaction_details: dict = field(default_factory=lambda: {}) | ||
readable_output: dict = field(default_factory=lambda: {}) | ||
|
||
def add_molecule(self, name): | ||
mol = Chem.MolFromPDBFile(name, removeHs=False) | ||
mol_df = mol_to_dataframe(mol) | ||
self.deemian_data.molecule_dataframe[mol_filename] = mol_df | ||
self.molecule[name] = Molecule(mol, mol_df) | ||
|
||
def add_selection(self, name, selection, mol_parent): | ||
parent_df = self.molecule[mol_parent].mol_dataframe | ||
selection_df = mol_dataframe_selection(selection, parent_df) | ||
|
||
def assign_selection(self, name: str, selection: list[tuple], mol_filename: str): | ||
mol_df = self.deemian_data.molecule_dataframe[mol_filename] | ||
self.deemian_data.molecule_dataframe[name] = mol_dataframe_selection(selection, mol_df) | ||
selection.insert(0, mol_filename) | ||
self.deemian_data.selections[name] = selection | ||
self.selection[name] = Selection(mol_parent, selection_df) | ||
|
||
def correct_bond(self, name: str, template: str): | ||
mol_df = self.deemian_data.molecule_dataframe[name] | ||
mol_pdb_block = dataframe_to_pdb_block(mol_df) | ||
self.deemian_data.molecule_pdb_block[name] = mol_pdb_block | ||
def correct_bond(self, name, template): | ||
selection_df = self.selection[name].mol_dataframe | ||
selection_pdb_block = dataframe_to_pdb_block(selection_df) | ||
selection_mol = Chem.MolFromPDBBlock(selection_pdb_block) | ||
|
||
mol = Chem.MolFromPDBBlock(mol_pdb_block) | ||
template_mol = Chem.MolFromSmiles(template) | ||
mol = Chem.AssignBondOrdersFromTemplate(template_mol, mol) | ||
self.deemian_data.molecule_dataframe[name] = mol_to_dataframe(mol) | ||
corrected_mol = Chem.AssignBondOrdersFromTemplate(template_mol, selection_mol) | ||
corrected_df = mol_to_dataframe(corrected_mol) | ||
|
||
def set_interactions(self, interactions: list[str]): | ||
self.deemian_data.interactions = interactions | ||
self.molecule[name] = Molecule(corrected_mol) | ||
self.selection[name] = Selection(name, corrected_df, selection_pdb_block) | ||
|
||
def set_ionizable(self, charge: str, boolean: str): | ||
if boolean == "true": | ||
boolean = True | ||
elif boolean == "false": | ||
boolean = False | ||
self.deemian_data.ionizable[charge] = boolean | ||
def add_measurement(self, id): | ||
return self.measurement[id] | ||
|
||
def calculate_interactions(self, id): | ||
measurement = self.measurement[id] | ||
|
||
def set_interacting_subjects(self, subject_1: str, subject_2: str, name: str): | ||
self.deemian_data.interacting_subjects[name] = (subject_1, subject_2) | ||
for pair in measurement.interacting_subjects: | ||
subject_1, subject_2 = measurement.interacting_subjects[pair] | ||
subject_1 = self.selection[subject_1] | ||
subject_2 = self.selection[subject_2] | ||
subject_1_mol = self.molecule[subject_1.mol_parent] | ||
subject_2_mol = self.molecule[subject_2.mol_parent] | ||
subject_1_df = subject_1.mol_dataframe | ||
subject_2_df = subject_2.mol_dataframe | ||
conformation = measurement.conformation | ||
if not any(conformation): | ||
conformation = [1] | ||
|
||
def set_conformation(self, number: str): | ||
self.deemian_data.conformation = [int(number)] | ||
interaction_data = InteractionData(subject_1_mol, subject_2_mol, subject_1_df, subject_2_df, conformation) | ||
|
||
def set_conformation_range(self, start: str, end: str): | ||
self.deemian_data.conformation = list(range(int(start), int(end) + 1)) | ||
interaction_type = measurement.interactions | ||
|
||
def calculate_interactions(self, measurement_identifier: str): | ||
return measurement_identifier | ||
if ("electrostatic" in interaction_type) or ("all" in interaction_type): | ||
interaction_data.calculate_electrostatic(**measurement.ionizable) | ||
|
||
def write_readable_output(self, out_file: str, presentation_identifier: str): | ||
return (out_file, presentation_identifier) | ||
self.interaction_details[pair] = interaction_data | ||
|
||
def write_deemian_data(self, out_file: str, presentation_identifier: str): | ||
return (out_file, presentation_identifier) | ||
def write_readable_output(self, out_file: str, presentation_id: str): | ||
return (out_file, presentation_id) | ||
|
||
def generate_deemian_data(self): | ||
return self.deemian_data | ||
def write_deemian_data(self, out_file: str, presentation_id: str): | ||
return (out_file, presentation_id) |
Oops, something went wrong.