Merge pull request #19 from volkamerlab/update-black-formatting

Update black formatting (23.3.0) & pandas API (2.0.0)
volkamerlab · Apr 10, 2023 · 197d3f4 · 197d3f4
2 parents 1e53ef1 + 5d0b9fb
commit 197d3f4
Show file tree

Hide file tree

Showing 6 changed files with 8 additions and 58 deletions.
diff --git a/ratar/auxiliary.py b/ratar/auxiliary.py
@@ -54,7 +54,6 @@ class MoleculeLoader:
     """
 
     def __init__(self, molecule_path, remove_solvent=False):
-
         self.molecule_path = Path(molecule_path)
         self.remove_solvent = remove_solvent
         self.molecules = self._load_molecule()
@@ -127,7 +126,6 @@ def _load_mol2(self, remove_solvent=False):
         for mol2 in split_multimol2(
             str(self.molecule_path)
         ):  # biopandas not compatible with pathlib
-
             # Mol2 files can have 9 or 10 columns.
             try:  # Try 9 columns.
                 molecule = PandasMol2().read_mol2_from_list(
@@ -311,7 +309,6 @@ class AminoAcidDescriptors:
     """
 
     def __init__(self):
-
         zscales_path = ratar_path / "data" / "zscales.csv"
         self.zscales = pd.read_csv(zscales_path, index_col="aa3")
 
@@ -416,7 +413,6 @@ def _preprocess_pseudocenters():
     id_suffix = 1
 
     for index, row in pc_df.iterrows():
-
         # Create prefix of pseudocenter ID
         id_prefix_new = f'{row["residue"]}_{row["pc_type"]}'
 

diff --git a/ratar/encoding.py b/ratar/encoding.py
@@ -57,7 +57,6 @@ class BindingSite:
     """
 
     def __init__(self):
-
         self.molecule = None
         self.representatives = None
         self.shapes = None
@@ -306,7 +305,6 @@ class Representatives:
     """
 
     def __init__(self):
-
         self.molecule_id = ""
         self.data = {
             "ca": pd.DataFrame(),
@@ -446,11 +444,9 @@ def _get_pca(molecule_df):
         molecule_pca = []
 
         for index, row in molecule_df_copy.iterrows():
-
             query = f'{row["res_name"]}_{row["atom_name"]}'
 
             if query in list(pseudocenter_atoms["pc_atom_pattern"]):  # Non-peptide bond atoms
-
                 pc_ix = pseudocenter_atoms.index[
                     pseudocenter_atoms["pc_atom_pattern"] == query
                 ].tolist()[0]
@@ -465,15 +461,12 @@ def _get_pca(molecule_df):
                 )
 
             elif row["atom_name"] == "O":  # Peptide bond atoms
-
                 molecule_pca.append([index, "HBA", "PEP_HBA_1", "PEP_HBA_1_0"])
 
             elif row["atom_name"] == "N":  # Peptide bond atoms
-
                 molecule_pca.append([index, "HBD", "PEP_HBD_1", "PEP_HBD_1_N"])
 
             elif row["atom_name"] == "C":  # Peptide bond atoms
-
                 molecule_pca.append([index, "AR", "PEP_AR_1", "PEP_AR_1_C"])
 
         # Cast list of lists to DataFrame
@@ -512,7 +505,6 @@ def _get_pc(self, molecule_df):
         molecule_pc = []
 
         for _, group in molecule_pca_df.groupby(["subst_name", "pc_id"], sort=False):
-
             if len(group) == 1:  # If pseudocenter only contains one atom, keep data
                 row = group.iloc[0].copy()
                 row["atom_id"] = [row["atom_id"]]
@@ -570,7 +562,6 @@ class Coordinates:
     """
 
     def __init__(self):
-
         self.molecule_id = ""
         self.data = {
             "ca": None,
@@ -714,7 +705,6 @@ class PhysicoChemicalProperties:
     """
 
     def __init__(self):
-
         self.molecule_id = ""
         self.data = {
             "ca": {},
@@ -914,7 +904,6 @@ class Subsets:
     """
 
     def __init__(self):
-
         self.molecule_id = ""
         self.data_pseudocenter_subsets = {"pca": {}, "pc": {}}
 
@@ -1006,14 +995,12 @@ def from_representatives(self, representatives):
         self.data_pseudocenter_subsets = {}
 
         for k1 in ["pc", "pca"]:
-
             self.data_pseudocenter_subsets[k1] = {}
 
             repres = representatives.data[k1]
 
             # Loop over all pseudocenter subset types
             for k2 in list(set(pseudocenter_atoms["pc_type"])):
-
                 # If pseudocenter type exists in dataset, save corresponding subset, else save None
                 if k2 in set(repres["pc_type"]):
                     self.data_pseudocenter_subsets[k1][k2] = list(
@@ -1061,7 +1048,6 @@ class Points:
     """
 
     def __init__(self):
-
         self.molecule_id = ""
         self.data = {"ca": {}, "pca": {}, "pc": {}}
         self.data_pseudocenter_subsets = {"pc": {}, "pca": {}}
@@ -1221,7 +1207,6 @@ def from_properties(self, coordinates, physicochemicalproperties):
         physicochemicalproperties_keys = physicochemicalproperties.data["ca"].keys()
 
         for k1 in coordinates.data.keys():
-
             self.data[k1] = {}
 
             # Add points without physicochemical properties
@@ -1264,7 +1249,6 @@ def from_subsets(self, subsets):
 
         # Subset: pseudocenter atoms
         for k1, v1 in self.data.items():  # Representatives
-
             # Select points keys that we want to subset, e.g. we want to subset pseudocenters but not Calpha atoms
             if k1 in subsets.data_pseudocenter_subsets.keys():
                 self.data_pseudocenter_subsets[k1] = {}
@@ -1331,7 +1315,6 @@ class Shapes:
     """
 
     def __init__(self):
-
         self.molecule_id = ""
         self.data = {"ca": {}, "pca": {}, "pc": {}}
         self.data_pseudocenter_subsets = {"pc": {}, "pca": {}}
@@ -1875,13 +1858,16 @@ def _calc_shape_4dim_electroshape(self, points, scaling_factor=1):
 
         # b) Add forth dimension with maximum and minmum of points' 4th dimension
         max_value_4thdim = max(points.iloc[:, [3]].values)[0]
-        min_value_4thdim = min(points.iloc[:, [3]].values)[0]
-        ref4 = c_s.append(
-            pd.Series([scaling_factor * max_value_4thdim], index=[points.columns[3]])
+        max_value_4thdim = pd.Series(
+            [scaling_factor * max_value_4thdim], index=[points.columns[3]]
         )
-        ref5 = c_s.append(
-            pd.Series([scaling_factor * min_value_4thdim], index=[points.columns[3]])
+        ref4 = pd.concat([c_s, max_value_4thdim])
+
+        min_value_4thdim = min(points.iloc[:, [3]].values)[0]
+        min_value_4thdim = pd.Series(
+            [scaling_factor * min_value_4thdim], index=[points.columns[3]]
         )
+        ref5 = pd.concat([c_s, min_value_4thdim])
 
         # Get distances from ref4 and ref5 to all other points
         dist_ref4 = self._calc_distances_to_point(points, ref4)
@@ -2285,7 +2271,6 @@ def process_encoding(molecule_path, output_dir, remove_solvent=False):
 
     # Iterate over all binding sites (molecule structure files)
     for mol_counter, mol_path in enumerate(molecule_path_list, 1):
-
         # Load binding site from molecule structure file
         molecule_loader = MoleculeLoader(mol_path, remove_solvent)
 
@@ -2397,12 +2382,10 @@ def save_cgo_file(binding_site, output_path):
     bs_flat_keys = [i for i in bs_flat.keys() if "ref_points" in i]
 
     for key in bs_flat_keys:
-
         if bs_flat[key] is None:
             logger.info(f"Empty encoding for {key}.")
 
         else:
-
             # Get reference points (coordinates)
             ref_points = bs_flat[key]
 
@@ -2424,7 +2407,6 @@ def save_cgo_file(binding_site, output_path):
 
             # For each reference point, write sphere color, coordinates and size to file
             for index, row in ref_points.iterrows():
-
                 # Set sphere color
                 sphere_color = list(sphere_colors[counter_colors])
                 counter_colors = counter_colors + 1

diff --git a/ratar/similarity.py b/ratar/similarity.py
@@ -95,7 +95,6 @@ def get_similarity_all_against_all(encoded_molecules_path, measure="modified_man
     for repres in bindingsite.shapes.data.keys():
         for dim in bindingsite.shapes.data[repres].keys():
             for method in bindingsite.shapes.data[repres][dim].keys():
-
                 # Set name for encoding method (representatives and method) and
                 # use as key for dictionary of all-against-all matrices
                 desc = f"{repres}_{dim}_{method}"
@@ -105,7 +104,6 @@ def get_similarity_all_against_all(encoded_molecules_path, measure="modified_man
 
     # Load all possible binding site pairs (to construct an upper triangular matrix)
     for path1, path2 in itertools.combinations(path_list, r=2):
-
         # Load binding site pair
         with open(path1, "rb") as f:
             bindingsite1 = pickle.load(f)
@@ -115,7 +113,6 @@ def get_similarity_all_against_all(encoded_molecules_path, measure="modified_man
         for repres in bindingsite1.shapes.data.keys():
             for dim in bindingsite1.shapes.data[repres].keys():
                 for method in bindingsite1.shapes.data[repres][dim].keys():
-
                     # Set name for encoding method (representatives and method) and
                     # use as key for dictionary of all-against-all matrices
                     desc = f"{repres}_{dim}_{method}"
@@ -173,7 +170,6 @@ def get_similarity_pairs(pairs, encoded_molecules_path, measure="modified_manhat
     pairwise_similarities = defaultdict(list)
 
     for _, pair in pairs.iterrows():
-
         path1 = Path(encoded_molecules_path.replace("%", pair[0]))
         path2 = Path(encoded_molecules_path.replace("%", pair[1]))
 
@@ -193,7 +189,6 @@ def get_similarity_pairs(pairs, encoded_molecules_path, measure="modified_manhat
             )
 
         for (shape_key1, shape1), (_, shape2) in zip(shapes1.items(), shapes2.items()):
-
             similarity = calculate_similarity(shape1.moments, shape2.moments, measure)
             pairwise_similarities[shape_key1].append(similarity)
 

diff --git a/ratar/tests/test_auxiliary.py b/ratar/tests/test_auxiliary.py
@@ -12,7 +12,6 @@
     [(7, "ASN_HBA_1_OD1", "ASN_OD1", "ASN_HBA_1", "HBA")],
 )
 def test_load_pseudocenters(df_index, pc_atom_id, pc_atom_pattern, pc_id, pc_type):
-
     pc = load_pseudocenters(remove_hbda=False)
 
     assert pc.shape == (76, 4)
@@ -27,7 +26,6 @@ def test_load_pseudocenters(df_index, pc_atom_id, pc_atom_pattern, pc_id, pc_typ
     [(7, "ASN_HBA_1_OD1", "ASN_OD1", "ASN_HBA_1", "HBA")],
 )
 def test_load_pseudocenters_remove_hbda(df_index, pc_atom_id, pc_atom_pattern, pc_id, pc_type):
-
     pc = load_pseudocenters(remove_hbda=True)
 
     assert pc.shape == (71, 4)

diff --git a/ratar/tests/test_encoding_representatives.py b/ratar/tests/test_encoding_representatives.py
@@ -99,7 +99,6 @@ def test_from_molecule(filename, column_names, n_atoms, centroid):
 
 @pytest.mark.parametrize("filename", [("AAK1_4wsq_altA_chainA_reduced.mol2")])
 def test_get_ca_datatypes(filename):
-
     # Load molecule
     molecule_path = Path(__name__).parent / "ratar" / "tests" / "data" / filename
     molecule_loader = MoleculeLoader(molecule_path)
@@ -126,7 +125,6 @@ def test_get_ca_datatypes(filename):
 
 @pytest.mark.parametrize("filename", [("AAK1_4wsq_altA_chainA_reduced.mol2")])
 def test_get_pca_datatypes(filename):
-
     # Load molecule
     molecule_path = Path(__name__).parent / "ratar" / "tests" / "data" / filename
     molecule_loader = MoleculeLoader(molecule_path)
@@ -156,7 +154,6 @@ def test_get_pca_datatypes(filename):
 
 @pytest.mark.parametrize("filename", [("AAK1_4wsq_altA_chainA_reduced.mol2")])
 def test_get_pca_pc_datatypes(filename):
-
     # Load molecule
     molecule_path = Path(__name__).parent / "ratar" / "tests" / "data" / filename
     molecule_loader = MoleculeLoader(molecule_path)