Skip to content

Commit

Permalink
Merge pull request #27 from radifar/identify-electrostatic-interactions
Browse files Browse the repository at this point in the history
Identify electrostatic interactions
  • Loading branch information
radifar authored Dec 24, 2023
2 parents 869d6a1 + 644ecab commit e280e1b
Show file tree
Hide file tree
Showing 25 changed files with 601 additions and 460 deletions.
192 changes: 43 additions & 149 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ readme = "README.md"

[tool.poetry.dependencies]
python = "^3.10"
pydantic = "^2.4.2"
lark = "^1.1.7"
click = "^8.1.7"
rdkit = "^2023.3.3"
pandas = "^2.1.4"
pyarrow = "^14.0.1"
scipy = "^1.11.4"

[tool.poetry.group.dev.dependencies]
rich = "^13.6.0"
Expand Down
67 changes: 67 additions & 0 deletions src/deemian/chem/interaction_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from numpy.linalg import norm as dist
import pandas as pd
from rdkit.Chem import AllChem as Chem

from deemian.chem.utility import flatten_atom_list


def generate_pair_info(query_result, s1_df, s2_df, conformation, interaction_type):
pair_info_df = pd.DataFrame()
for i, s2_list in enumerate(query_result):
s1 = s1_df.iloc[[i]].reset_index()
s2 = s2_df.iloc[s2_list].reset_index()
s2["atom_id_1"] = s1.iloc[0].atom_id
pair = s1.merge(s2, left_on="atom_id", right_on="atom_id_1", suffixes=("_s1", "_s2")).drop(
columns=["atom_id_1"]
)
pair_info_df = pd.concat([pair_info_df, pair])

pair_info_df.reset_index(drop=True, inplace=True)

conf_1 = f"conf_{conformation}_s1"
conf_2 = f"conf_{conformation}_s2"
pair_info_df["distance"] = (pair_info_df[conf_1] - pair_info_df[conf_2]).map(dist)
pair_info_df["conformation"] = conformation
pair_info_df["interaction_type"] = interaction_type

return pair_info_df


def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]:
apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"]
apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"]
apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern
apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion]

potential_cation = [
"[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]",
"[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]",
"[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]",
"NC(=N)",
"[n;R1]1[c;R1][n;R1][c;R1][c;R1]1",
]
potential_anion = ["O=[C,S,N,P]-[OH,O-]"]
potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation]
potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion]

all_cation = apparent_cation + potential_cation
all_anion = apparent_anion + potential_anion

filter_dictionary = dict(
apparent_cation=apparent_cation,
apparent_anion=apparent_anion,
potential_cation=potential_cation,
potential_anion=potential_anion,
all_cation=all_cation,
all_anion=all_anion,
)

combined_match = []
try:
for pattern in filter_dictionary[mode]:
matched = flatten_atom_list(mol.GetSubstructMatches(pattern))
combined_match.append(matched)
except KeyError:
raise KeyError(f"Error: filtering mode {mode} is not recognized")

return flatten_atom_list(combined_match)
104 changes: 62 additions & 42 deletions src/deemian/chem/interactions.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,63 @@
from rdkit.Chem import AllChem as Chem
from dataclasses import dataclass

from deemian.chem.utility import flatten_atom_list


def filter_charge(mol: Chem.rdchem.Mol, mode: str) -> list[int]:
apparent_cation = ["[$([*+1,*+2,*+3]);!$([N+]-[O-])]", "[NX3&!$([NX3]-O)]-[C]=[NX3+]"]
apparent_anion = ["O=[C,S,N,P]-[O-]", "[*-1,*-2]"]
apparent_cation = [Chem.MolFromSmarts(p) for p in apparent_cation] # p for pattern
apparent_anion = [Chem.MolFromSmarts(p) for p in apparent_anion]

potential_cation = [
"[$([N;H2&+0][C;!$(C=*)]);!$(N[a])]",
"[$([N;H1&+0]([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]",
"[$([N;H0&+0]([C;!$(C=*)])([C;!$(C=*)])[C;!$(C=*)]);!$(N[a])]",
"NC(=N)",
"[n;R1]1[c;R1][n;R1][c;R1][c;R1]1",
]
potential_anion = ["O=[C,S,N,P]-[OH,O-]"]
potential_cation = [Chem.MolFromSmarts(p) for p in potential_cation]
potential_anion = [Chem.MolFromSmarts(p) for p in potential_anion]

all_cation = apparent_cation + potential_cation
all_anion = apparent_anion + potential_anion

filter_dictionary = dict(
apparent_cation=apparent_cation,
apparent_anion=apparent_anion,
potential_cation=potential_cation,
potential_anion=potential_anion,
all_cation=all_cation,
all_anion=all_anion,
)

combined_match = []
try:
for pattern in filter_dictionary[mode]:
matched = flatten_atom_list(mol.GetSubstructMatches(pattern))
combined_match.append(matched)
except KeyError:
raise KeyError(f"Error: filtering mode {mode} is not recognized")

return flatten_atom_list(combined_match)
import pandas as pd
from rdkit.Chem import AllChem as Chem
from scipy.spatial import KDTree

from deemian.chem.interaction_utility import filter_charge, generate_pair_info


@dataclass
class InteractionData:
subject_1_mol: Chem.rdchem.Mol
subject_2_mol: Chem.rdchem.Mol
subject_1_df: pd.DataFrame
subject_2_df: pd.DataFrame
conformation: list
electrostatic_s1_as_cation: pd.DataFrame = None
electrostatic_s1_as_anion: pd.DataFrame = None

def calculate_electrostatic(self, positive: bool, negative: bool):
cation_mode = "all_cation" if positive else "apparent_cation"
anion_mode = "all_anion" if negative else "apparent_anion"

cation_1_ids = filter_charge(self.subject_1_mol, cation_mode)
anion_1_ids = filter_charge(self.subject_1_mol, anion_mode)
cation_2_ids = filter_charge(self.subject_2_mol, cation_mode)
anion_2_ids = filter_charge(self.subject_2_mol, anion_mode)

df1 = self.subject_1_df
df2 = self.subject_2_df
exclude_atom = ["C", "N", "S", "P"]
cation_1_df = df1[df1.index.isin(cation_1_ids)]
anion_1_df = df1[df1.index.isin(anion_1_ids) & ~df1["atom_symbol"].isin(exclude_atom)]
cation_2_df = df2[df2.index.isin(cation_2_ids)]
anion_2_df = df2[df2.index.isin(anion_2_ids) & ~df2["atom_symbol"].isin(exclude_atom)]

for conf_num in self.conformation:
conformation_column = "conf_" + str(conf_num)

if (not cation_1_df.empty) and (not anion_2_df.empty):
print(cation_1_df.empty, anion_2_df.empty)
cation_1_tree = KDTree(cation_1_df[conformation_column].to_list())
anion_2_tree = KDTree(anion_2_df[conformation_column].to_list())

s1_as_cation = cation_1_tree.query_ball_tree(anion_2_tree, 4.5)

self.electrostatic_s1_as_cation = generate_pair_info(
s1_as_cation, cation_1_df, anion_2_df, conf_num, "electrostatic_cation"
)
else:
self.electrostatic_s1_as_cation = pd.DataFrame()

if (not cation_2_df.empty) and (not anion_1_df.empty):
cation_2_tree = KDTree(cation_2_df[conformation_column].to_list())
anion_1_tree = KDTree(anion_1_df[conformation_column].to_list())

s1_as_anion = anion_1_tree.query_ball_tree(cation_2_tree, 4.5)

self.electrostatic_s1_as_anion = generate_pair_info(
s1_as_anion, anion_1_df, cation_2_df, conf_num, "electrostatic_anion"
)
else:
self.electrostatic_s1_as_anion = pd.DataFrame()
2 changes: 1 addition & 1 deletion src/deemian/chem/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def mol_to_dataframe(mol):
# note: using pyarrow for list of float making it slower for 'to_list' conversion,
# the scipy's KDTree require list of list as the input.
for index, conformer in enumerate(mol.GetConformers()):
coordinate_number = "conf_" + str(index)
coordinate_number = "conf_" + str(index + 1)
mol_record[coordinate_number] = list(conformer.GetPositions())

return pd.DataFrame(mol_record).set_index("atom_id")
2 changes: 1 addition & 1 deletion src/deemian/chem/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def dataframe_to_pdb_block(df: pd.DataFrame) -> str:
df_noh = df[df["atom_symbol"] != "H"]
# from https://stackoverflow.com/questions/35491274/split-a-pandas-column-of-lists-into-multiple-columns

df_pdb = pd.DataFrame(df_noh["conf_0"].to_list(), columns=["x", "y", "z"])
df_pdb = pd.DataFrame(df_noh["conf_1"].to_list(), columns=["x", "y", "z"])
df_pdb.index = df_noh.index
df_pdb = df_pdb.join(df_noh).reset_index()
df_pdb["record_type"] = "HETATM"
Expand Down
121 changes: 75 additions & 46 deletions src/deemian/engine/builder.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,106 @@
from collections import defaultdict
from dataclasses import dataclass, field

import pandas as pd
from rdkit.Chem import AllChem as Chem

from deemian.chem.interactions import InteractionData
from deemian.chem.reader import mol_to_dataframe
from deemian.chem.selection import mol_dataframe_selection
from deemian.chem.utility import dataframe_to_pdb_block


@dataclass
class DeemianData:
molecule_dataframe: dict = field(default_factory=lambda: {})
molecule_pdb_block: dict = field(default_factory=lambda: {})
selections: dict = field(default_factory=lambda: {})
class Molecule:
rdkit_mol: Chem.rdchem.Mol
mol_dataframe: pd.DataFrame = None


@dataclass
class Selection:
mol_parent: str
mol_dataframe: pd.DataFrame
mol_pdb_block: str = None


@dataclass
class Measurement:
interactions: list = field(default_factory=lambda: [])
ionizable: dict = field(default_factory=lambda: {"positive": False, "negative": False})
interacting_subjects: dict = field(default_factory=lambda: {})
conformation: list = field(default_factory=lambda: [])
interaction_details: dict = field(default_factory=lambda: {})
readable_output: dict = field(default_factory=lambda: {})

def conformation_range(self, start, end):
self.conformation = list(range(int(start), int(end) + 1))

class DeemianDataBuilder:
def __init__(self, deemian_data: DeemianData) -> None:
self.deemian_data = deemian_data
def set_ionizable(self, charge: str, boolean: str):
if boolean == "true":
boolean = True
elif boolean == "false":
boolean = False
self.ionizable[charge] = boolean

def read_molecule(self, mol_filename: str):
mol = Chem.MolFromPDBFile(mol_filename, removeHs=False)

@dataclass
class DeemianData:
molecule: dict[str, Molecule] = field(default_factory=lambda: {})
selection: dict[str, Selection] = field(default_factory=lambda: {})
measurement: dict[str, Measurement] = field(default_factory=lambda: defaultdict(Measurement))
interaction_details: dict = field(default_factory=lambda: {})
readable_output: dict = field(default_factory=lambda: {})

def add_molecule(self, name):
mol = Chem.MolFromPDBFile(name, removeHs=False)
mol_df = mol_to_dataframe(mol)
self.deemian_data.molecule_dataframe[mol_filename] = mol_df
self.molecule[name] = Molecule(mol, mol_df)

def add_selection(self, name, selection, mol_parent):
parent_df = self.molecule[mol_parent].mol_dataframe
selection_df = mol_dataframe_selection(selection, parent_df)

def assign_selection(self, name: str, selection: list[tuple], mol_filename: str):
mol_df = self.deemian_data.molecule_dataframe[mol_filename]
self.deemian_data.molecule_dataframe[name] = mol_dataframe_selection(selection, mol_df)
selection.insert(0, mol_filename)
self.deemian_data.selections[name] = selection
self.selection[name] = Selection(mol_parent, selection_df)

def correct_bond(self, name: str, template: str):
mol_df = self.deemian_data.molecule_dataframe[name]
mol_pdb_block = dataframe_to_pdb_block(mol_df)
self.deemian_data.molecule_pdb_block[name] = mol_pdb_block
def correct_bond(self, name, template):
selection_df = self.selection[name].mol_dataframe
selection_pdb_block = dataframe_to_pdb_block(selection_df)
selection_mol = Chem.MolFromPDBBlock(selection_pdb_block)

mol = Chem.MolFromPDBBlock(mol_pdb_block)
template_mol = Chem.MolFromSmiles(template)
mol = Chem.AssignBondOrdersFromTemplate(template_mol, mol)
self.deemian_data.molecule_dataframe[name] = mol_to_dataframe(mol)
corrected_mol = Chem.AssignBondOrdersFromTemplate(template_mol, selection_mol)
corrected_df = mol_to_dataframe(corrected_mol)

def set_interactions(self, interactions: list[str]):
self.deemian_data.interactions = interactions
self.molecule[name] = Molecule(corrected_mol)
self.selection[name] = Selection(name, corrected_df, selection_pdb_block)

def set_ionizable(self, charge: str, boolean: str):
if boolean == "true":
boolean = True
elif boolean == "false":
boolean = False
self.deemian_data.ionizable[charge] = boolean
def add_measurement(self, id):
return self.measurement[id]

def calculate_interactions(self, id):
measurement = self.measurement[id]

def set_interacting_subjects(self, subject_1: str, subject_2: str, name: str):
self.deemian_data.interacting_subjects[name] = (subject_1, subject_2)
for pair in measurement.interacting_subjects:
subject_1, subject_2 = measurement.interacting_subjects[pair]
subject_1 = self.selection[subject_1]
subject_2 = self.selection[subject_2]
subject_1_mol = self.molecule[subject_1.mol_parent]
subject_2_mol = self.molecule[subject_2.mol_parent]
subject_1_df = subject_1.mol_dataframe
subject_2_df = subject_2.mol_dataframe
conformation = measurement.conformation
if not any(conformation):
conformation = [1]

def set_conformation(self, number: str):
self.deemian_data.conformation = [int(number)]
interaction_data = InteractionData(subject_1_mol, subject_2_mol, subject_1_df, subject_2_df, conformation)

def set_conformation_range(self, start: str, end: str):
self.deemian_data.conformation = list(range(int(start), int(end) + 1))
interaction_type = measurement.interactions

def calculate_interactions(self, measurement_identifier: str):
return measurement_identifier
if ("electrostatic" in interaction_type) or ("all" in interaction_type):
interaction_data.calculate_electrostatic(**measurement.ionizable)

def write_readable_output(self, out_file: str, presentation_identifier: str):
return (out_file, presentation_identifier)
self.interaction_details[pair] = interaction_data

def write_deemian_data(self, out_file: str, presentation_identifier: str):
return (out_file, presentation_identifier)
def write_readable_output(self, out_file: str, presentation_id: str):
return (out_file, presentation_id)

def generate_deemian_data(self):
return self.deemian_data
def write_deemian_data(self, out_file: str, presentation_id: str):
return (out_file, presentation_id)
Loading

0 comments on commit e280e1b

Please sign in to comment.