Skip to content

Commit

Permalink
Merge pull request #28 from radifar/write-readable-output
Browse files Browse the repository at this point in the history
Write readable output
  • Loading branch information
radifar authored Jan 2, 2024
2 parents e280e1b + 589c937 commit 8dedeba
Show file tree
Hide file tree
Showing 30 changed files with 717 additions and 78 deletions.
88 changes: 87 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ rdkit = "^2023.3.3"
pandas = "^2.1.4"
pyarrow = "^14.0.1"
scipy = "^1.11.4"
jinja2 = "^3.1.2"

[tool.poetry.group.dev.dependencies]
rich = "^13.6.0"
Expand Down
Empty file added src/deemian/chem/__init__.py
Empty file.
16 changes: 11 additions & 5 deletions src/deemian/chem/interaction_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,17 @@

def generate_pair_info(query_result, s1_df, s2_df, conformation, interaction_type):
pair_info_df = pd.DataFrame()

conf = f"conf_{conformation}"
for i, s2_list in enumerate(query_result):
s1 = s1_df.iloc[[i]].reset_index()
s2 = s2_df.iloc[s2_list].reset_index()
columns = ["atom_id", "chain_id", "atom_symbol", "atom_name", "residue_number", "residue_name", conf]

s1 = s1_df.iloc[[i]].reset_index()[columns]
s1.rename(columns={conf: "coord"}, inplace=True)

s2 = s2_df.iloc[s2_list].reset_index()[columns]
s2.rename(columns={conf: "coord"}, inplace=True)

s2["atom_id_1"] = s1.iloc[0].atom_id
pair = s1.merge(s2, left_on="atom_id", right_on="atom_id_1", suffixes=("_s1", "_s2")).drop(
columns=["atom_id_1"]
Expand All @@ -18,9 +26,7 @@ def generate_pair_info(query_result, s1_df, s2_df, conformation, interaction_typ

pair_info_df.reset_index(drop=True, inplace=True)

conf_1 = f"conf_{conformation}_s1"
conf_2 = f"conf_{conformation}_s2"
pair_info_df["distance"] = (pair_info_df[conf_1] - pair_info_df[conf_2]).map(dist)
pair_info_df["distance"] = (pair_info_df["coord_s1"] - pair_info_df["coord_s2"]).map(dist)
pair_info_df["conformation"] = conformation
pair_info_df["interaction_type"] = interaction_type

Expand Down
20 changes: 10 additions & 10 deletions src/deemian/chem/interactions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field

import pandas as pd
from rdkit.Chem import AllChem as Chem
Expand All @@ -14,8 +14,7 @@ class InteractionData:
subject_1_df: pd.DataFrame
subject_2_df: pd.DataFrame
conformation: list
electrostatic_s1_as_cation: pd.DataFrame = None
electrostatic_s1_as_anion: pd.DataFrame = None
dataframe: pd.DataFrame = field(default_factory=lambda: pd.DataFrame())

def calculate_electrostatic(self, positive: bool, negative: bool):
cation_mode = "all_cation" if positive else "apparent_cation"
Expand All @@ -38,26 +37,27 @@ def calculate_electrostatic(self, positive: bool, negative: bool):
conformation_column = "conf_" + str(conf_num)

if (not cation_1_df.empty) and (not anion_2_df.empty):
print(cation_1_df.empty, anion_2_df.empty)
cation_1_tree = KDTree(cation_1_df[conformation_column].to_list())
anion_2_tree = KDTree(anion_2_df[conformation_column].to_list())

s1_as_cation = cation_1_tree.query_ball_tree(anion_2_tree, 4.5)

self.electrostatic_s1_as_cation = generate_pair_info(
pair_info_df = generate_pair_info(
s1_as_cation, cation_1_df, anion_2_df, conf_num, "electrostatic_cation"
)
else:
self.electrostatic_s1_as_cation = pd.DataFrame()

if not pair_info_df.empty:
self.dataframe = pd.concat([self.dataframe, pair_info_df])

if (not cation_2_df.empty) and (not anion_1_df.empty):
cation_2_tree = KDTree(cation_2_df[conformation_column].to_list())
anion_1_tree = KDTree(anion_1_df[conformation_column].to_list())

s1_as_anion = anion_1_tree.query_ball_tree(cation_2_tree, 4.5)

self.electrostatic_s1_as_anion = generate_pair_info(
pair_info_df = generate_pair_info(
s1_as_anion, anion_1_df, cation_2_df, conf_num, "electrostatic_anion"
)
else:
self.electrostatic_s1_as_anion = pd.DataFrame()

if not pair_info_df.empty:
self.dataframe = pd.concat([self.dataframe, pair_info_df])
16 changes: 10 additions & 6 deletions src/deemian/engine/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from deemian.chem.reader import mol_to_dataframe
from deemian.chem.selection import mol_dataframe_selection
from deemian.chem.utility import dataframe_to_pdb_block
from deemian.writer.readable import generate_report, write_readable


@dataclass
Expand All @@ -29,6 +30,7 @@ class Measurement:
ionizable: dict = field(default_factory=lambda: {"positive": False, "negative": False})
interacting_subjects: dict = field(default_factory=lambda: {})
conformation: list = field(default_factory=lambda: [])
calculation_results: dict = field(default_factory=lambda: {})

def conformation_range(self, start, end):
self.conformation = list(range(int(start), int(end) + 1))
Expand All @@ -46,7 +48,6 @@ class DeemianData:
molecule: dict[str, Molecule] = field(default_factory=lambda: {})
selection: dict[str, Selection] = field(default_factory=lambda: {})
measurement: dict[str, Measurement] = field(default_factory=lambda: defaultdict(Measurement))
interaction_details: dict = field(default_factory=lambda: {})
readable_output: dict = field(default_factory=lambda: {})

def add_molecule(self, name):
Expand Down Expand Up @@ -82,8 +83,8 @@ def calculate_interactions(self, id):
subject_1, subject_2 = measurement.interacting_subjects[pair]
subject_1 = self.selection[subject_1]
subject_2 = self.selection[subject_2]
subject_1_mol = self.molecule[subject_1.mol_parent]
subject_2_mol = self.molecule[subject_2.mol_parent]
subject_1_mol = self.molecule[subject_1.mol_parent].rdkit_mol
subject_2_mol = self.molecule[subject_2.mol_parent].rdkit_mol
subject_1_df = subject_1.mol_dataframe
subject_2_df = subject_2.mol_dataframe
conformation = measurement.conformation
Expand All @@ -97,10 +98,13 @@ def calculate_interactions(self, id):
if ("electrostatic" in interaction_type) or ("all" in interaction_type):
interaction_data.calculate_electrostatic(**measurement.ionizable)

self.interaction_details[pair] = interaction_data
measurement.calculation_results[pair] = interaction_data

def write_readable_output(self, out_file: str, presentation_id: str):
return (out_file, presentation_id)
def write_readable_output(self, presentation_id: str, out_file: str, form):
measurement = self.measurement[presentation_id]

report = generate_report(measurement, form)
write_readable(report, out_file)

def write_deemian_data(self, out_file: str, presentation_id: str):
return (out_file, presentation_id)
4 changes: 2 additions & 2 deletions src/deemian/engine/director.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def director(steps: Tree, data: DeemianData):
presentation_id = instructions.pop(0).value

for inst in instructions:
if inst.type == "readable_output":
data.write_readable_output(inst.out_file, presentation_id)
if inst.type == "interaction_output":
data.write_readable_output(presentation_id, inst.out_file, inst.format)

elif inst.type == "deemian_data":
data.write_deemian_data(inst.out_file, presentation_id)
13 changes: 10 additions & 3 deletions src/deemian/engine/grammar.lark
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ measurement_instruction: "interactions" INTERACTION("," INTERACTION)*
| "conformation" INTEGER("," INTEGER)* -> conformation
| "conformation" INTEGER "to" INTEGER -> conformation_range

presentation_instruction: RESULTS("," RESULTS)* FILE_NAME -> readable_output
| "deemiandata" FILE_NAME -> deemian_data
presentation_instruction: "interactions" INTERACTION_FORMAT? FILE_NAME -> interaction_output
| "IPA" FILE_NAME -> ipa_output
| "bitstring" FILE_NAME -> bitstring_output
| "deemiandata" FILE_NAME -> deemian_data

combine_selection: LOGIC~1..2 (keyvalue | MACRO)
selection: keyvalue | MACRO
Expand All @@ -32,8 +34,13 @@ LOGIC: ("AND"|"and"|"OR"|"or"|"NOT"|"not")
BOOLEAN: ("TRUE"|"true"|"FALSE"|"false")
INTERACTION: ("nonpolar"|"electrostatic"|"hydrogen_bond"|"pi"|"all")
PREPOSITION: ("ON"|"on"|"IN"|"in"|"BETWEEN"|"between"|"AND"|"and")
RESULTS: ("interactions"|"IPA"|"bitstring")
CHARGE: ("positive"|"negative")
INTERACTION_FORMAT.1: ("detailed_conf_first"
| "detailed_type_first"
| "clustered_conf_first"
| "clustered_type_first"
| "summarized_conf_first"
| "summarized_type_first")
FILE_NAME: /[0-9a-zA-Z_.\-]+/
IDENTIFIER: /[0-9a-zA-Z_.\-]+/
INTEGER: /[0-9]+/
Expand Down
Loading

0 comments on commit 8dedeba

Please sign in to comment.