Skip to content

Commit

Permalink
Merge pull request #19 from volkamerlab/update-black-formatting
Browse files Browse the repository at this point in the history
Update black formatting (23.3.0) & pandas API (2.0.0)
  • Loading branch information
dominiquesydow authored Apr 10, 2023
2 parents 1e53ef1 + 5d0b9fb commit 197d3f4
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 58 deletions.
4 changes: 0 additions & 4 deletions ratar/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ class MoleculeLoader:
"""

def __init__(self, molecule_path, remove_solvent=False):

self.molecule_path = Path(molecule_path)
self.remove_solvent = remove_solvent
self.molecules = self._load_molecule()
Expand Down Expand Up @@ -127,7 +126,6 @@ def _load_mol2(self, remove_solvent=False):
for mol2 in split_multimol2(
str(self.molecule_path)
): # biopandas not compatible with pathlib

# Mol2 files can have 9 or 10 columns.
try: # Try 9 columns.
molecule = PandasMol2().read_mol2_from_list(
Expand Down Expand Up @@ -311,7 +309,6 @@ class AminoAcidDescriptors:
"""

def __init__(self):

zscales_path = ratar_path / "data" / "zscales.csv"
self.zscales = pd.read_csv(zscales_path, index_col="aa3")

Expand Down Expand Up @@ -416,7 +413,6 @@ def _preprocess_pseudocenters():
id_suffix = 1

for index, row in pc_df.iterrows():

# Create prefix of pseudocenter ID
id_prefix_new = f'{row["residue"]}_{row["pc_type"]}'

Expand Down
34 changes: 8 additions & 26 deletions ratar/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ class BindingSite:
"""

def __init__(self):

self.molecule = None
self.representatives = None
self.shapes = None
Expand Down Expand Up @@ -306,7 +305,6 @@ class Representatives:
"""

def __init__(self):

self.molecule_id = ""
self.data = {
"ca": pd.DataFrame(),
Expand Down Expand Up @@ -446,11 +444,9 @@ def _get_pca(molecule_df):
molecule_pca = []

for index, row in molecule_df_copy.iterrows():

query = f'{row["res_name"]}_{row["atom_name"]}'

if query in list(pseudocenter_atoms["pc_atom_pattern"]): # Non-peptide bond atoms

pc_ix = pseudocenter_atoms.index[
pseudocenter_atoms["pc_atom_pattern"] == query
].tolist()[0]
Expand All @@ -465,15 +461,12 @@ def _get_pca(molecule_df):
)

elif row["atom_name"] == "O": # Peptide bond atoms

molecule_pca.append([index, "HBA", "PEP_HBA_1", "PEP_HBA_1_0"])

elif row["atom_name"] == "N": # Peptide bond atoms

molecule_pca.append([index, "HBD", "PEP_HBD_1", "PEP_HBD_1_N"])

elif row["atom_name"] == "C": # Peptide bond atoms

molecule_pca.append([index, "AR", "PEP_AR_1", "PEP_AR_1_C"])

# Cast list of lists to DataFrame
Expand Down Expand Up @@ -512,7 +505,6 @@ def _get_pc(self, molecule_df):
molecule_pc = []

for _, group in molecule_pca_df.groupby(["subst_name", "pc_id"], sort=False):

if len(group) == 1: # If pseudocenter only contains one atom, keep data
row = group.iloc[0].copy()
row["atom_id"] = [row["atom_id"]]
Expand Down Expand Up @@ -570,7 +562,6 @@ class Coordinates:
"""

def __init__(self):

self.molecule_id = ""
self.data = {
"ca": None,
Expand Down Expand Up @@ -714,7 +705,6 @@ class PhysicoChemicalProperties:
"""

def __init__(self):

self.molecule_id = ""
self.data = {
"ca": {},
Expand Down Expand Up @@ -914,7 +904,6 @@ class Subsets:
"""

def __init__(self):

self.molecule_id = ""
self.data_pseudocenter_subsets = {"pca": {}, "pc": {}}

Expand Down Expand Up @@ -1006,14 +995,12 @@ def from_representatives(self, representatives):
self.data_pseudocenter_subsets = {}

for k1 in ["pc", "pca"]:

self.data_pseudocenter_subsets[k1] = {}

repres = representatives.data[k1]

# Loop over all pseudocenter subset types
for k2 in list(set(pseudocenter_atoms["pc_type"])):

# If pseudocenter type exists in dataset, save corresponding subset, else save None
if k2 in set(repres["pc_type"]):
self.data_pseudocenter_subsets[k1][k2] = list(
Expand Down Expand Up @@ -1061,7 +1048,6 @@ class Points:
"""

def __init__(self):

self.molecule_id = ""
self.data = {"ca": {}, "pca": {}, "pc": {}}
self.data_pseudocenter_subsets = {"pc": {}, "pca": {}}
Expand Down Expand Up @@ -1221,7 +1207,6 @@ def from_properties(self, coordinates, physicochemicalproperties):
physicochemicalproperties_keys = physicochemicalproperties.data["ca"].keys()

for k1 in coordinates.data.keys():

self.data[k1] = {}

# Add points without physicochemical properties
Expand Down Expand Up @@ -1264,7 +1249,6 @@ def from_subsets(self, subsets):

# Subset: pseudocenter atoms
for k1, v1 in self.data.items(): # Representatives

# Select points keys that we want to subset, e.g. we want to subset pseudocenters but not Calpha atoms
if k1 in subsets.data_pseudocenter_subsets.keys():
self.data_pseudocenter_subsets[k1] = {}
Expand Down Expand Up @@ -1331,7 +1315,6 @@ class Shapes:
"""

def __init__(self):

self.molecule_id = ""
self.data = {"ca": {}, "pca": {}, "pc": {}}
self.data_pseudocenter_subsets = {"pc": {}, "pca": {}}
Expand Down Expand Up @@ -1875,13 +1858,16 @@ def _calc_shape_4dim_electroshape(self, points, scaling_factor=1):

# b) Add forth dimension with maximum and minmum of points' 4th dimension
max_value_4thdim = max(points.iloc[:, [3]].values)[0]
min_value_4thdim = min(points.iloc[:, [3]].values)[0]
ref4 = c_s.append(
pd.Series([scaling_factor * max_value_4thdim], index=[points.columns[3]])
max_value_4thdim = pd.Series(
[scaling_factor * max_value_4thdim], index=[points.columns[3]]
)
ref5 = c_s.append(
pd.Series([scaling_factor * min_value_4thdim], index=[points.columns[3]])
ref4 = pd.concat([c_s, max_value_4thdim])

min_value_4thdim = min(points.iloc[:, [3]].values)[0]
min_value_4thdim = pd.Series(
[scaling_factor * min_value_4thdim], index=[points.columns[3]]
)
ref5 = pd.concat([c_s, min_value_4thdim])

# Get distances from ref4 and ref5 to all other points
dist_ref4 = self._calc_distances_to_point(points, ref4)
Expand Down Expand Up @@ -2285,7 +2271,6 @@ def process_encoding(molecule_path, output_dir, remove_solvent=False):

# Iterate over all binding sites (molecule structure files)
for mol_counter, mol_path in enumerate(molecule_path_list, 1):

# Load binding site from molecule structure file
molecule_loader = MoleculeLoader(mol_path, remove_solvent)

Expand Down Expand Up @@ -2397,12 +2382,10 @@ def save_cgo_file(binding_site, output_path):
bs_flat_keys = [i for i in bs_flat.keys() if "ref_points" in i]

for key in bs_flat_keys:

if bs_flat[key] is None:
logger.info(f"Empty encoding for {key}.")

else:

# Get reference points (coordinates)
ref_points = bs_flat[key]

Expand All @@ -2424,7 +2407,6 @@ def save_cgo_file(binding_site, output_path):

# For each reference point, write sphere color, coordinates and size to file
for index, row in ref_points.iterrows():

# Set sphere color
sphere_color = list(sphere_colors[counter_colors])
counter_colors = counter_colors + 1
Expand Down
5 changes: 0 additions & 5 deletions ratar/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def get_similarity_all_against_all(encoded_molecules_path, measure="modified_man
for repres in bindingsite.shapes.data.keys():
for dim in bindingsite.shapes.data[repres].keys():
for method in bindingsite.shapes.data[repres][dim].keys():

# Set name for encoding method (representatives and method) and
# use as key for dictionary of all-against-all matrices
desc = f"{repres}_{dim}_{method}"
Expand All @@ -105,7 +104,6 @@ def get_similarity_all_against_all(encoded_molecules_path, measure="modified_man

# Load all possible binding site pairs (to construct an upper triangular matrix)
for path1, path2 in itertools.combinations(path_list, r=2):

# Load binding site pair
with open(path1, "rb") as f:
bindingsite1 = pickle.load(f)
Expand All @@ -115,7 +113,6 @@ def get_similarity_all_against_all(encoded_molecules_path, measure="modified_man
for repres in bindingsite1.shapes.data.keys():
for dim in bindingsite1.shapes.data[repres].keys():
for method in bindingsite1.shapes.data[repres][dim].keys():

# Set name for encoding method (representatives and method) and
# use as key for dictionary of all-against-all matrices
desc = f"{repres}_{dim}_{method}"
Expand Down Expand Up @@ -173,7 +170,6 @@ def get_similarity_pairs(pairs, encoded_molecules_path, measure="modified_manhat
pairwise_similarities = defaultdict(list)

for _, pair in pairs.iterrows():

path1 = Path(encoded_molecules_path.replace("%", pair[0]))
path2 = Path(encoded_molecules_path.replace("%", pair[1]))

Expand All @@ -193,7 +189,6 @@ def get_similarity_pairs(pairs, encoded_molecules_path, measure="modified_manhat
)

for (shape_key1, shape1), (_, shape2) in zip(shapes1.items(), shapes2.items()):

similarity = calculate_similarity(shape1.moments, shape2.moments, measure)
pairwise_similarities[shape_key1].append(similarity)

Expand Down
2 changes: 0 additions & 2 deletions ratar/tests/test_auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
[(7, "ASN_HBA_1_OD1", "ASN_OD1", "ASN_HBA_1", "HBA")],
)
def test_load_pseudocenters(df_index, pc_atom_id, pc_atom_pattern, pc_id, pc_type):

pc = load_pseudocenters(remove_hbda=False)

assert pc.shape == (76, 4)
Expand All @@ -27,7 +26,6 @@ def test_load_pseudocenters(df_index, pc_atom_id, pc_atom_pattern, pc_id, pc_typ
[(7, "ASN_HBA_1_OD1", "ASN_OD1", "ASN_HBA_1", "HBA")],
)
def test_load_pseudocenters_remove_hbda(df_index, pc_atom_id, pc_atom_pattern, pc_id, pc_type):

pc = load_pseudocenters(remove_hbda=True)

assert pc.shape == (71, 4)
Expand Down
3 changes: 0 additions & 3 deletions ratar/tests/test_encoding_representatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ def test_from_molecule(filename, column_names, n_atoms, centroid):

@pytest.mark.parametrize("filename", [("AAK1_4wsq_altA_chainA_reduced.mol2")])
def test_get_ca_datatypes(filename):

# Load molecule
molecule_path = Path(__name__).parent / "ratar" / "tests" / "data" / filename
molecule_loader = MoleculeLoader(molecule_path)
Expand All @@ -126,7 +125,6 @@ def test_get_ca_datatypes(filename):

@pytest.mark.parametrize("filename", [("AAK1_4wsq_altA_chainA_reduced.mol2")])
def test_get_pca_datatypes(filename):

# Load molecule
molecule_path = Path(__name__).parent / "ratar" / "tests" / "data" / filename
molecule_loader = MoleculeLoader(molecule_path)
Expand Down Expand Up @@ -156,7 +154,6 @@ def test_get_pca_datatypes(filename):

@pytest.mark.parametrize("filename", [("AAK1_4wsq_altA_chainA_reduced.mol2")])
def test_get_pca_pc_datatypes(filename):

# Load molecule
molecule_path = Path(__name__).parent / "ratar" / "tests" / "data" / filename
molecule_loader = MoleculeLoader(molecule_path)
Expand Down
Loading

0 comments on commit 197d3f4

Please sign in to comment.