From b72cd815ba0f90639841e01033d0115a8892d011 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sat, 9 Nov 2024 22:32:04 +0100
Subject: [PATCH 01/30] wrapper4insty is added as calcSignatureInteractions()

---
 prody/proteins/interactions.py | 287 ++++++++++++++++++++++++++++++++-
 1 file changed, 286 insertions(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index e6918a6cf..c741b426c 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -32,6 +32,8 @@
 from prody.ensemble import Ensemble
 
 import multiprocessing
+import matplotlib.pylab as plt
+import os
 
 __all__ = ['calcHydrogenBonds', 'calcChHydrogenBonds', 'calcSaltBridges',
            'calcRepulsiveIonicBonding', 'calcPiStacking', 'calcPiCation',
@@ -47,7 +49,7 @@
            'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
            'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
            'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
-           'saveInteractionsAsDummyAtoms']
+           'saveInteractionsAsDummyAtoms', 'calcSignatureInteractions']
 
 
 def cleanNumbers(listContacts):
@@ -308,6 +310,233 @@ def showPairEnergy(data, **kwargs):
     return data
 
 
+# SignatureInteractions supporting functions
+from Bio.PDB.Polypeptide import three_to_one
+import shutil
+import importlib.util
+
+def remove_empty_strings(row):
+    """Remove empty strings from a list."""
+    return [elem for elem in row if elem != '']
+
+def log_message(message, level="INFO"):
+    """Log a message with a specified log level."""
+    print(f"[{level}] {message}")
+
+def is_module_installed(module_name):
+    """Check if a Python module is installed."""
+    spec = importlib.util.find_spec(module_name)
+    return spec is not None
+
+def is_command_installed(command):
+    """Check if a command-line tool is installed."""
+    return shutil.which(command) is not None
+
+def load_residues_from_pdb(pdb_file):
+    """Extract residue numbers and their corresponding one-letter amino acid codes from a PDB file."""
+    structure = parsePDB(pdb_file)
+    residues = structure.iterResidues()
+    residue_dict = {}
+
+    for res in residues:
+        resnum = res.getResnum()
+        resname = res.getResname()  # Three-letter amino acid code
+        try:
+            one_letter_code = three_to_one(resname)  # Convert to one-letter code
+            residue_dict[resnum] = one_letter_code
+        except KeyError:
+            log_message(f"Unknown residue: {resname} at position {resnum}", "WARNING")
+
+    return residue_dict
+
+def append_residue_code(residue_num, residue_dict):
+    """Return a string with one-letter amino acid code and residue number."""
+    aa_code = residue_dict.get(residue_num, "X")  # Use "X" for unknown residues
+    return f"{aa_code}{residue_num}"
+
+def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
+    """Process the mapping file and the PDB folder to compute interaction counts and percentages."""
+    log_message(f"Loading mapping file: {mapping_file}")
+
+    # Load and clean the mapping file
+    try:
+        mapping = np.loadtxt(mapping_file, delimiter=' ', dtype=str)
+    except Exception as e:
+        log_message(f"Error loading mapping file: {e}", "ERROR")
+        return None
+
+    mapping = np.where(mapping == '-', np.nan, mapping)
+    filtered_mapping = np.array([remove_empty_strings(row) for row in mapping])
+    mapping_num = filtered_mapping.astype(float)
+
+    # Load the one-letter amino acid codes from model1.pdb
+    pdb_model_path = os.path.join(pdb_folder, 'model1.pdb')
+    residue_dict = load_residues_from_pdb(pdb_model_path)
+
+    log_message(f"Processing PDB files in folder: {pdb_folder}")
+
+    tar_bond_ind = []
+    processed_files = 0  # To track the number of files successfully processed
+    fixed_files = []  # Store paths of fixed files to remove at the end
+
+    for i, files in enumerate(os.listdir(pdb_folder)):
+        # Skip any file that has already been processed (files with 'addH_' prefix)
+        if files.startswith('addH_'):
+            log_message(f"Skipping already fixed file: {files}", "INFO")
+            continue
+
+        log_message(f"Processing file {i+1}: {files}")
+
+        pdb_file_path = os.path.join(pdb_folder, files)
+        fixed_pdb_path = pdb_file_path.replace(files, 'addH_' + files)
+
+        # Check if the fixed file already exists, skip fixing if it does
+        if not os.path.exists(fixed_pdb_path):
+            try:
+                log_message(f"Running fixer on {pdb_file_path} using {fixer}.")
+                addMissingAtoms(pdb_file_path, method=fixer)
+            except Exception as e:
+                log_message(f"Error adding missing atoms: {e}", "ERROR")
+                continue
+        else:
+            log_message(f"Using existing fixed file: {fixed_pdb_path}")
+
+        try:
+            coords = parsePDB(fixed_pdb_path)
+            atoms = coords.select('protein')
+            interactions = Interactions()
+            bonds = interaction_func(atoms)
+        except Exception as e:
+            log_message(f"Error processing PDB file {files}: {e}", "ERROR")
+            continue
+
+        # If no bonds were found, skip this file
+        if len(bonds) == 0:
+            log_message(f"No {bond_type} found in file {files}, skipping.", "WARNING")
+            continue
+
+        processed_files += 1  # Increment successfully processed files
+        fixed_files.append(fixed_pdb_path)
+
+        if processed_files == 1:  # First valid file with bonds determines target indices
+            for entries in bonds:
+                ent = list(np.sort((int(entries[0][3:]), int(entries[3][3:]))))  # Ensure integers
+                tar_bond_ind.append(ent)
+            tar_bond_ind = np.unique(np.array(tar_bond_ind), axis=0).astype(int)
+            count = np.zeros(tar_bond_ind.shape[0], dtype=int)
+
+        bond_ind = []
+        for entries in bonds:
+            ent = list(np.sort((int(entries[0][3:]), int(entries[3][3:]))))  # Ensure integers
+            bond_ind.append(ent)
+        bond_ind = np.unique(np.array(bond_ind), axis=0)
+
+        # Ensure bond_ind is a 2D array
+        if bond_ind.ndim == 1:
+            bond_ind = bond_ind.reshape(-1, 2)  # Reshape to (n, 2) if it's 1D
+
+        for j, pairs in enumerate(tar_bond_ind):
+            ind1_matches = np.where(mapping_num[0] == pairs[0])[0]
+            ind2_matches = np.where(mapping_num[0] == pairs[1])[0]
+
+            if ind1_matches.size > 0 and ind2_matches.size > 0:
+                if processed_files - 1 < mapping_num.shape[0]:
+                    ind1 = mapping_num[processed_files - 1, ind1_matches[0]]
+                    ind2 = mapping_num[processed_files - 1, ind2_matches[0]]
+
+                    if not (np.isnan(ind1) or np.isnan(ind2)):
+                        index = np.where(np.logical_and(bond_ind[:, 0] == int(ind1), bond_ind[:, 1] == int(ind2)))[0]
+                        if index.size != 0:
+                            count[j] += 1
+                else:
+                    log_message(f"Skipping file {files} due to index out of bounds error", "WARNING")
+            else:
+                log_message(f"No matching indices found for {pairs} in {files}", "WARNING")
+
+    # If no files were successfully processed or no bonds were found
+    if processed_files == 0 or len(tar_bond_ind) == 0:
+        log_message(f"No valid {bond_type} entries found across all PDB files.", "ERROR")
+        return None
+
+    count_reshaped = count.reshape(-1, 1)
+    count_normalized = (count / processed_files * 100).reshape(-1, 1)
+
+    # Modify tar_bond_ind to append the amino acid code before residue index
+    tar_bond_with_aa = []
+    for bond in tar_bond_ind:
+        res1 = append_residue_code(bond[0], residue_dict)  # Append AA code for Res1
+        res2 = append_residue_code(bond[1], residue_dict)  # Append AA code for Res2
+        tar_bond_with_aa.append([res1, res2])
+
+    tar_bond_with_aa = np.array(tar_bond_with_aa)
+
+    # Combine tar_bond_with_aa with count and percentage
+    output_data = np.hstack((tar_bond_with_aa, count_reshaped, count_normalized))
+
+    log_message(f"Finished processing {processed_files} PDB files.")
+    output_filename = f'{bond_type}_consensus.txt'
+
+    # Save the result with amino acid codes and numeric values
+    np.savetxt(output_filename, output_data, fmt='%s %s %s %s', delimiter=' ', 
+           header='Res1 Res2 Count Percentage', comments='')
+
+    return output_data, fixed_files  # Return fixed_files to remove later
+
+
+def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
+    """Plot horizontal bar plots of percentages, splitting the data into fixed-sized plots."""
+    plt.rcParams.update({'font.size': 20})
+
+    # Set default value for n_per_plot if None is passed
+    if n_per_plot is None:
+        n_per_plot = 20
+
+    # Error handling if result is None or empty
+    if result is None or len(result) == 0:
+        log_message(f"Skipping plot for {bond_type} due to insufficient data.", "ERROR")
+        return
+
+    num_entries = result.shape[0]
+    num_plots = (num_entries + n_per_plot - 1) // n_per_plot  # Number of plots required
+
+    for plot_idx in range(num_plots):
+        # Slice the data for the current plot (take the next 'n_per_plot' entries)
+        start_idx = plot_idx * n_per_plot
+        end_idx = min((plot_idx + 1) * n_per_plot, num_entries)
+        result_chunk = result[start_idx:end_idx]
+
+        log_message(f"Plotting entries {start_idx+1} to {end_idx} for {bond_type}.")
+
+        # Use residue numbers for y-axis labels
+        y_labels = [f"{str(row[0])}-{str(row[1])}" for row in result_chunk]
+        percentage_values = result_chunk[:, 3].astype('float')
+
+        norm = plt.Normalize(vmin=0, vmax=100)
+        cmap = plt.cm.get_cmap('coolwarm')
+
+        # Set the figure height with a minimum height of `min_height` for small datasets
+        fig_height = max(min_height, len(result_chunk) * 0.4)
+        plt.figure(figsize=(18, fig_height))
+        bars = plt.barh(y_labels, percentage_values, color=cmap(norm(percentage_values)))
+
+        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
+        sm.set_array([])
+
+        # Adjust color bar height to match the number of entries
+        plt.colorbar(sm, label='Percentage', fraction=0.02, pad=0.04)
+
+        plt.ylim(-1, result_chunk.shape[0])
+        plt.ylabel(f'{bond_type} Pairs of Residue Numbers')
+        plt.xlabel('Percentage')
+        plt.title(f'Persistence of {bond_type} Bonds (entries {start_idx+1}-{end_idx})')
+
+        # Save each plot with an incremented filename for multiple plots
+        output_plot_file = f'{bond_type}_plot_part{plot_idx + 1}.png'
+        log_message(f"Saving plot to: {output_plot_file}")
+        plt.savefig(output_plot_file)
+        plt.close()
+        log_message(f"Plot saved successfully.")
+
 
 def calcHydrophobicOverlapingAreas(atoms, **kwargs):
     """Provide information about hydrophobic contacts between pairs of residues based on 
@@ -3010,6 +3239,62 @@ def showSminaTermValues(data):
     return show
 
 
+def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
+    """Analyzes protein structures to identify various interactions using InSty. 
+    Processes data from the MSA file and folder with selected models.
+    
+    :arg mapping_file: Aligned residue indices, MSA file type
+    :type mapping_file: str
+
+    :arg PDB_folder: Directory containing PDB model files
+    :type PDB_folder: str
+
+    :arg fixer: The method for fixing lack of hydrogen bonds
+    :type fixer: 'pdbfixer' or 'openbabel'
+    """
+    
+    import os
+    functions = {
+        "HydrogenBonds": calcHydrogenBonds,
+        "SaltBridges": calcSaltBridges,
+        "RepulsiveIonicBonding": calcRepulsiveIonicBonding,
+        "PiStacking": calcPiStacking,
+        "PiCation": calcPiCation,
+        "Hydrophobic": calcHydrophobic,
+        "DisulfideBonds": calcDisulfideBonds
+    }
+
+    # Process each bond type
+    for bond_type, func in functions.items():
+        # Check if the consensus file already exists
+        #consensus_file = f'{bond_type}_consensus.txt'
+        #if os.path.exists(consensus_file):
+        #    log_message(f"Consensus file for {bond_type} already exists, skipping.", "INFO")
+        #    continue
+
+        log_message(f"Processing {bond_type}")
+        
+        result = process_data(mapping_file, PDB_folder, func, bond_type, fixer)
+
+        # Check if the result is None (no valid bonds found)
+        if result is None:
+            log_message(f"No valid {bond_type} entries found, skipping further processing.", "WARNING")
+            continue
+
+        result, fixed_files = result
+
+        # Proceed with plotting
+        n = None
+        plot_barh(result, bond_type, n)
+
+    # Remove all fixed files at the end
+    if 'fixed_files' in locals():
+        for fixed_file in fixed_files:
+            if os.path.exists(fixed_file):
+                os.remove(fixed_file)
+                log_message(f"Removed fixed file: {fixed_file}")
+
+
 
 class Interactions(object):
 

From 4505312a13bfe4d2ef5fe7adbf637dc991a90abe Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sat, 9 Nov 2024 23:01:05 +0100
Subject: [PATCH 02/30] fstrings removed

---
 prody/proteins/interactions.py | 58 ++++++++++++++++------------------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index c741b426c..2c47b9753 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -321,7 +321,7 @@ def remove_empty_strings(row):
 
 def log_message(message, level="INFO"):
     """Log a message with a specified log level."""
-    print(f"[{level}] {message}")
+    print("[{}] {}".format(level, message))
 
 def is_module_installed(module_name):
     """Check if a Python module is installed."""
@@ -345,24 +345,24 @@ def load_residues_from_pdb(pdb_file):
             one_letter_code = three_to_one(resname)  # Convert to one-letter code
             residue_dict[resnum] = one_letter_code
         except KeyError:
-            log_message(f"Unknown residue: {resname} at position {resnum}", "WARNING")
-
+            log_message("Unknown residue: {} at position {}".format(resname, resnum), "WARNING")
+            
     return residue_dict
 
 def append_residue_code(residue_num, residue_dict):
     """Return a string with one-letter amino acid code and residue number."""
     aa_code = residue_dict.get(residue_num, "X")  # Use "X" for unknown residues
-    return f"{aa_code}{residue_num}"
+    return "{}{}".format(aa_code, residue_num)
 
 def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     """Process the mapping file and the PDB folder to compute interaction counts and percentages."""
-    log_message(f"Loading mapping file: {mapping_file}")
+    log_message("Loading mapping file: {}".format(mapping_file))
 
     # Load and clean the mapping file
     try:
         mapping = np.loadtxt(mapping_file, delimiter=' ', dtype=str)
     except Exception as e:
-        log_message(f"Error loading mapping file: {e}", "ERROR")
+        log_message("Error loading mapping file: {}".format(e), "ERROR")
         return None
 
     mapping = np.where(mapping == '-', np.nan, mapping)
@@ -373,7 +373,7 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     pdb_model_path = os.path.join(pdb_folder, 'model1.pdb')
     residue_dict = load_residues_from_pdb(pdb_model_path)
 
-    log_message(f"Processing PDB files in folder: {pdb_folder}")
+    log_message("Processing PDB files in folder: {}".format(pdb_folder))
 
     tar_bond_ind = []
     processed_files = 0  # To track the number of files successfully processed
@@ -382,24 +382,24 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     for i, files in enumerate(os.listdir(pdb_folder)):
         # Skip any file that has already been processed (files with 'addH_' prefix)
         if files.startswith('addH_'):
-            log_message(f"Skipping already fixed file: {files}", "INFO")
+            log_message("Skipping already fixed file: {}".format(files), "INFO")
             continue
 
-        log_message(f"Processing file {i+1}: {files}")
-
+        log_message("Processing file {}: {}".format(i + 1, files))
+        
         pdb_file_path = os.path.join(pdb_folder, files)
         fixed_pdb_path = pdb_file_path.replace(files, 'addH_' + files)
 
         # Check if the fixed file already exists, skip fixing if it does
         if not os.path.exists(fixed_pdb_path):
             try:
-                log_message(f"Running fixer on {pdb_file_path} using {fixer}.")
+                log_message("Running fixer on {} using {}.".format(pdb_file_path, fixer))
                 addMissingAtoms(pdb_file_path, method=fixer)
             except Exception as e:
-                log_message(f"Error adding missing atoms: {e}", "ERROR")
+                log_message("Error adding missing atoms: {}".format(e), "ERROR")
                 continue
         else:
-            log_message(f"Using existing fixed file: {fixed_pdb_path}")
+            log_message("Using existing fixed file: {}".format(fixed_pdb_path))
 
         try:
             coords = parsePDB(fixed_pdb_path)
@@ -407,12 +407,12 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
             interactions = Interactions()
             bonds = interaction_func(atoms)
         except Exception as e:
-            log_message(f"Error processing PDB file {files}: {e}", "ERROR")
+            log_message("Error processing PDB file {}: {}".format(files, e), "ERROR")
             continue
 
         # If no bonds were found, skip this file
         if len(bonds) == 0:
-            log_message(f"No {bond_type} found in file {files}, skipping.", "WARNING")
+            log_message("No {} found in file {}, skipping.".format(bond_type, files), "WARNING")
             continue
 
         processed_files += 1  # Increment successfully processed files
@@ -449,13 +449,13 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
                         if index.size != 0:
                             count[j] += 1
                 else:
-                    log_message(f"Skipping file {files} due to index out of bounds error", "WARNING")
+                    log_message("Skipping file {} due to index out of bounds error".format(files), "WARNING")
             else:
-                log_message(f"No matching indices found for {pairs} in {files}", "WARNING")
+                log_message("No matching indices found for {} in {}".format(pairs, files), "WARNING")
 
     # If no files were successfully processed or no bonds were found
     if processed_files == 0 or len(tar_bond_ind) == 0:
-        log_message(f"No valid {bond_type} entries found across all PDB files.", "ERROR")
+        log_message("No valid {} entries found across all PDB files.".format(bond_type), "ERROR")
         return None
 
     count_reshaped = count.reshape(-1, 1)
@@ -473,8 +473,8 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     # Combine tar_bond_with_aa with count and percentage
     output_data = np.hstack((tar_bond_with_aa, count_reshaped, count_normalized))
 
-    log_message(f"Finished processing {processed_files} PDB files.")
-    output_filename = f'{bond_type}_consensus.txt'
+    log_message("Finished processing {} PDB files.".format(processed_files))
+    output_filename = '{}_consensus.txt'.format(bond_type)
 
     # Save the result with amino acid codes and numeric values
     np.savetxt(output_filename, output_data, fmt='%s %s %s %s', delimiter=' ', 
@@ -493,7 +493,7 @@ def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
 
     # Error handling if result is None or empty
     if result is None or len(result) == 0:
-        log_message(f"Skipping plot for {bond_type} due to insufficient data.", "ERROR")
+        log_message("Skipping plot for {} due to insufficient data.".format(bond_type), "ERROR")
         return
 
     num_entries = result.shape[0]
@@ -505,10 +505,9 @@ def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
         end_idx = min((plot_idx + 1) * n_per_plot, num_entries)
         result_chunk = result[start_idx:end_idx]
 
-        log_message(f"Plotting entries {start_idx+1} to {end_idx} for {bond_type}.")
-
+        log_message("Plotting entries {} to {} for {}.".format(start_idx + 1, end_idx, bond_type))
         # Use residue numbers for y-axis labels
-        y_labels = [f"{str(row[0])}-{str(row[1])}" for row in result_chunk]
+        y_labels = ["{}-{}".format(str(row[0]), str(row[1])) for row in result_chunk]
         percentage_values = result_chunk[:, 3].astype('float')
 
         norm = plt.Normalize(vmin=0, vmax=100)
@@ -526,16 +525,15 @@ def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
         plt.colorbar(sm, label='Percentage', fraction=0.02, pad=0.04)
 
         plt.ylim(-1, result_chunk.shape[0])
-        plt.ylabel(f'{bond_type} Pairs of Residue Numbers')
+        plt.ylabel('{} Pairs of Residue Numbers'.format(bond_type))
         plt.xlabel('Percentage')
-        plt.title(f'Persistence of {bond_type} Bonds (entries {start_idx+1}-{end_idx})')
-
+        plt.title('Persistence of {} Bonds (entries {}-{})'.format(bond_type, start_idx + 1, end_idx))
         # Save each plot with an incremented filename for multiple plots
-        output_plot_file = f'{bond_type}_plot_part{plot_idx + 1}.png'
-        log_message(f"Saving plot to: {output_plot_file}")
+        output_plot_file = '{}_plot_part{}.png'.format(bond_type, plot_idx + 1)
+        log_message("Saving plot to: {}".format(output_plot_file))
         plt.savefig(output_plot_file)
         plt.close()
-        log_message(f"Plot saved successfully.")
+        log_message("Plot saved successfully.")
 
 
 def calcHydrophobicOverlapingAreas(atoms, **kwargs):

From b8cdafca125e70015badbb8cd7fcb5b664fb5abb Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sat, 9 Nov 2024 23:08:05 +0100
Subject: [PATCH 03/30] replacing additional fstrings

---
 prody/proteins/interactions.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 2c47b9753..7f66f841b 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3265,18 +3265,17 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     # Process each bond type
     for bond_type, func in functions.items():
         # Check if the consensus file already exists
-        #consensus_file = f'{bond_type}_consensus.txt'
-        #if os.path.exists(consensus_file):
-        #    log_message(f"Consensus file for {bond_type} already exists, skipping.", "INFO")
-        #    continue
+        consensus_file = '{}_consensus.txt'.format(bond_type)
+        if os.path.exists(consensus_file):
+            log_message("Consensus file for {} already exists, skipping.".format(bond_type), "INFO")
+            continue
 
-        log_message(f"Processing {bond_type}")
-        
+        log_message("Processing {}".format(bond_type))
         result = process_data(mapping_file, PDB_folder, func, bond_type, fixer)
 
         # Check if the result is None (no valid bonds found)
         if result is None:
-            log_message(f"No valid {bond_type} entries found, skipping further processing.", "WARNING")
+            log_message("No valid {} entries found, skipping further processing.".format(bond_type), "WARNING")
             continue
 
         result, fixed_files = result
@@ -3290,7 +3289,7 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
         for fixed_file in fixed_files:
             if os.path.exists(fixed_file):
                 os.remove(fixed_file)
-                log_message(f"Removed fixed file: {fixed_file}")
+                log_message("Removed fixed file: {}".format(fixed_file))
 
 
 

From 6345be8d149686550f56fbec9028b12d8b6918c0 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sat, 9 Nov 2024 23:53:39 +0100
Subject: [PATCH 04/30] extractMultiModelPDB

---
 prody/proteins/interactions.py | 44 ++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 7f66f841b..12257cce7 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3237,9 +3237,49 @@ def showSminaTermValues(data):
     return show
 
 
+def extractMultiModelPDB(multimodelPDB, folder_name):
+    """Extracts individual PDB models from multimodel PDB and places them into the pointed directory.
+    If used for calculating calcSignatureInteractions align the models.
+
+    :arg multimodelPDB: The file containing models in multi-model PDB format 
+    :type multimodelPDB: str
+    
+    :arg folder_name: The name of the folder to which PDBs will be extracted
+    :type folder_name: str
+    """
+
+    folder_name = kwargs.pop('folder_name', 'struc_homologs')
+    
+    with open(multimodelPDB, 'r') as f:
+        file = f.readlines()
+    os.makedirs(folder_name, exist_ok=True)
+
+    fp = None
+    for line in file:
+        line = line.strip()
+        sig1 = line[:5]
+        sig2 = line[:6]
+        sig3 = line[:4]
+
+        if sig1 == 'MODEL':
+            model_number = line.split()[1]
+            filename = 'model{}.pdb'.format(model_number)
+            fp = open(filename, 'w')
+            continue
+
+        if sig2 == 'ENDMDL':
+            if fp:
+                fp.close()
+            os.rename(filename, './{}/{}'.format(folder_name,filename))
+            continue
+
+        if sig3 == 'ATOM' and fp:
+            fp.write("{}\n".format(line))
+
+
 def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
-    """Analyzes protein structures to identify various interactions using InSty. 
-    Processes data from the MSA file and folder with selected models.
+     """Analyzes protein structures to identify various interactions using InSty. 
+     Processes data from the MSA file and folder with selected models.
     
     :arg mapping_file: Aligned residue indices, MSA file type
     :type mapping_file: str

From f088e5a7567a5635e4f4e9d49e111ba49157542f Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 09:17:01 +0100
Subject: [PATCH 05/30] fstrings replaced

---
 prody/proteins/interactions.py | 78 +++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 40 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index c741b426c..0a49dcaa4 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -34,6 +34,8 @@
 import multiprocessing
 import matplotlib.pylab as plt
 import os
+from . import fixer
+
 
 __all__ = ['calcHydrogenBonds', 'calcChHydrogenBonds', 'calcSaltBridges',
            'calcRepulsiveIonicBonding', 'calcPiStacking', 'calcPiCation',
@@ -49,7 +51,7 @@
            'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
            'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
            'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
-           'saveInteractionsAsDummyAtoms', 'calcSignatureInteractions']
+           'saveInteractionsAsDummyAtoms', 'extractMultiModelPDB', 'calcSignatureInteractions']
 
 
 def cleanNumbers(listContacts):
@@ -321,7 +323,7 @@ def remove_empty_strings(row):
 
 def log_message(message, level="INFO"):
     """Log a message with a specified log level."""
-    print(f"[{level}] {message}")
+    print("[{}] {}".format(level, message))
 
 def is_module_installed(module_name):
     """Check if a Python module is installed."""
@@ -345,24 +347,24 @@ def load_residues_from_pdb(pdb_file):
             one_letter_code = three_to_one(resname)  # Convert to one-letter code
             residue_dict[resnum] = one_letter_code
         except KeyError:
-            log_message(f"Unknown residue: {resname} at position {resnum}", "WARNING")
-
+            log_message("Unknown residue: {} at position {}".format(resname, resnum), "WARNING")
+            
     return residue_dict
 
 def append_residue_code(residue_num, residue_dict):
     """Return a string with one-letter amino acid code and residue number."""
     aa_code = residue_dict.get(residue_num, "X")  # Use "X" for unknown residues
-    return f"{aa_code}{residue_num}"
+    return "{}{}".format(aa_code, residue_num)
 
 def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     """Process the mapping file and the PDB folder to compute interaction counts and percentages."""
-    log_message(f"Loading mapping file: {mapping_file}")
+    log_message("Loading mapping file: {}".format(mapping_file))
 
     # Load and clean the mapping file
     try:
         mapping = np.loadtxt(mapping_file, delimiter=' ', dtype=str)
     except Exception as e:
-        log_message(f"Error loading mapping file: {e}", "ERROR")
+        log_message("Error loading mapping file: {}".format(e), "ERROR")
         return None
 
     mapping = np.where(mapping == '-', np.nan, mapping)
@@ -373,7 +375,7 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     pdb_model_path = os.path.join(pdb_folder, 'model1.pdb')
     residue_dict = load_residues_from_pdb(pdb_model_path)
 
-    log_message(f"Processing PDB files in folder: {pdb_folder}")
+    log_message("Processing PDB files in folder: {}".format(pdb_folder))
 
     tar_bond_ind = []
     processed_files = 0  # To track the number of files successfully processed
@@ -382,24 +384,24 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     for i, files in enumerate(os.listdir(pdb_folder)):
         # Skip any file that has already been processed (files with 'addH_' prefix)
         if files.startswith('addH_'):
-            log_message(f"Skipping already fixed file: {files}", "INFO")
+            log_message("Skipping already fixed file: {}".format(files), "INFO")
             continue
 
-        log_message(f"Processing file {i+1}: {files}")
-
+        log_message("Processing file {}: {}".format(i + 1, files))
+        
         pdb_file_path = os.path.join(pdb_folder, files)
         fixed_pdb_path = pdb_file_path.replace(files, 'addH_' + files)
 
         # Check if the fixed file already exists, skip fixing if it does
         if not os.path.exists(fixed_pdb_path):
             try:
-                log_message(f"Running fixer on {pdb_file_path} using {fixer}.")
+                log_message("Running fixer on {} using {}.".format(pdb_file_path, fixer))
                 addMissingAtoms(pdb_file_path, method=fixer)
             except Exception as e:
-                log_message(f"Error adding missing atoms: {e}", "ERROR")
+                log_message("Error adding missing atoms: {}".format(e), "ERROR")
                 continue
         else:
-            log_message(f"Using existing fixed file: {fixed_pdb_path}")
+            log_message("Using existing fixed file: {}".format(fixed_pdb_path))
 
         try:
             coords = parsePDB(fixed_pdb_path)
@@ -407,12 +409,12 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
             interactions = Interactions()
             bonds = interaction_func(atoms)
         except Exception as e:
-            log_message(f"Error processing PDB file {files}: {e}", "ERROR")
+            log_message("Error processing PDB file {}: {}".format(files, e), "ERROR")
             continue
 
         # If no bonds were found, skip this file
         if len(bonds) == 0:
-            log_message(f"No {bond_type} found in file {files}, skipping.", "WARNING")
+            log_message("No {} found in file {}, skipping.".format(bond_type, files), "WARNING")
             continue
 
         processed_files += 1  # Increment successfully processed files
@@ -449,13 +451,13 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
                         if index.size != 0:
                             count[j] += 1
                 else:
-                    log_message(f"Skipping file {files} due to index out of bounds error", "WARNING")
+                    log_message("Skipping file {} due to index out of bounds error".format(files), "WARNING")
             else:
-                log_message(f"No matching indices found for {pairs} in {files}", "WARNING")
+                log_message("No matching indices found for {} in {}".format(pairs, files), "WARNING")
 
     # If no files were successfully processed or no bonds were found
     if processed_files == 0 or len(tar_bond_ind) == 0:
-        log_message(f"No valid {bond_type} entries found across all PDB files.", "ERROR")
+        log_message("No valid {} entries found across all PDB files.".format(bond_type), "ERROR")
         return None
 
     count_reshaped = count.reshape(-1, 1)
@@ -473,8 +475,8 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     # Combine tar_bond_with_aa with count and percentage
     output_data = np.hstack((tar_bond_with_aa, count_reshaped, count_normalized))
 
-    log_message(f"Finished processing {processed_files} PDB files.")
-    output_filename = f'{bond_type}_consensus.txt'
+    log_message("Finished processing {} PDB files.".format(processed_files))
+    output_filename = '{}_consensus.txt'.format(bond_type)
 
     # Save the result with amino acid codes and numeric values
     np.savetxt(output_filename, output_data, fmt='%s %s %s %s', delimiter=' ', 
@@ -493,7 +495,7 @@ def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
 
     # Error handling if result is None or empty
     if result is None or len(result) == 0:
-        log_message(f"Skipping plot for {bond_type} due to insufficient data.", "ERROR")
+        log_message("Skipping plot for {} due to insufficient data.".format(bond_type), "ERROR")
         return
 
     num_entries = result.shape[0]
@@ -505,10 +507,9 @@ def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
         end_idx = min((plot_idx + 1) * n_per_plot, num_entries)
         result_chunk = result[start_idx:end_idx]
 
-        log_message(f"Plotting entries {start_idx+1} to {end_idx} for {bond_type}.")
-
+        log_message("Plotting entries {} to {} for {}.".format(start_idx + 1, end_idx, bond_type))
         # Use residue numbers for y-axis labels
-        y_labels = [f"{str(row[0])}-{str(row[1])}" for row in result_chunk]
+        y_labels = ["{}-{}".format(str(row[0]), str(row[1])) for row in result_chunk]
         percentage_values = result_chunk[:, 3].astype('float')
 
         norm = plt.Normalize(vmin=0, vmax=100)
@@ -526,16 +527,15 @@ def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
         plt.colorbar(sm, label='Percentage', fraction=0.02, pad=0.04)
 
         plt.ylim(-1, result_chunk.shape[0])
-        plt.ylabel(f'{bond_type} Pairs of Residue Numbers')
+        plt.ylabel('{} Pairs of Residue Numbers'.format(bond_type))
         plt.xlabel('Percentage')
-        plt.title(f'Persistence of {bond_type} Bonds (entries {start_idx+1}-{end_idx})')
-
+        plt.title('Persistence of {} Bonds (entries {}-{})'.format(bond_type, start_idx + 1, end_idx))
         # Save each plot with an incremented filename for multiple plots
-        output_plot_file = f'{bond_type}_plot_part{plot_idx + 1}.png'
-        log_message(f"Saving plot to: {output_plot_file}")
+        output_plot_file = '{}_plot_part{}.png'.format(bond_type, plot_idx + 1)
+        log_message("Saving plot to: {}".format(output_plot_file))
         plt.savefig(output_plot_file)
         plt.close()
-        log_message(f"Plot saved successfully.")
+        log_message("Plot saved successfully.")
 
 
 def calcHydrophobicOverlapingAreas(atoms, **kwargs):
@@ -3253,7 +3253,6 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     :type fixer: 'pdbfixer' or 'openbabel'
     """
     
-    import os
     functions = {
         "HydrogenBonds": calcHydrogenBonds,
         "SaltBridges": calcSaltBridges,
@@ -3267,18 +3266,17 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     # Process each bond type
     for bond_type, func in functions.items():
         # Check if the consensus file already exists
-        #consensus_file = f'{bond_type}_consensus.txt'
-        #if os.path.exists(consensus_file):
-        #    log_message(f"Consensus file for {bond_type} already exists, skipping.", "INFO")
-        #    continue
+        consensus_file = '{}_consensus.txt'.format(bond_type)
+        if os.path.exists(consensus_file):
+            log_message("Consensus file for {} already exists, skipping.".format(bond_type), "INFO")
+            continue
 
-        log_message(f"Processing {bond_type}")
-        
+        log_message("Processing {}".format(bond_type))
         result = process_data(mapping_file, PDB_folder, func, bond_type, fixer)
 
         # Check if the result is None (no valid bonds found)
         if result is None:
-            log_message(f"No valid {bond_type} entries found, skipping further processing.", "WARNING")
+            log_message("No valid {} entries found, skipping further processing.".format(bond_type), "WARNING")
             continue
 
         result, fixed_files = result
@@ -3292,7 +3290,7 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
         for fixed_file in fixed_files:
             if os.path.exists(fixed_file):
                 os.remove(fixed_file)
-                log_message(f"Removed fixed file: {fixed_file}")
+                log_message("Removed fixed file: {}".format(fixed_file))
 
 
 

From 0b8da0fa956fe0d0c206c41b721a9779e1d2bae3 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 09:59:03 +0100
Subject: [PATCH 06/30] fixer import

---
 prody/proteins/interactions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 7fd66f66d..1787022f2 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -34,7 +34,7 @@
 import multiprocessing
 import matplotlib.pylab as plt
 import os
-from . import fixer
+from .fixer import *
 
 
 __all__ = ['calcHydrogenBonds', 'calcChHydrogenBonds', 'calcSaltBridges',
@@ -3280,8 +3280,8 @@ def extractMultiModelPDB(multimodelPDB, folder_name):
 
 
 def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
-     """Analyzes protein structures to identify various interactions using InSty. 
-     Processes data from the MSA file and folder with selected models.
+    """Analyzes protein structures to identify various interactions using InSty. 
+    Processes data from the MSA file and folder with selected models.
     
     :arg mapping_file: Aligned residue indices, MSA file type
     :type mapping_file: str

From 1260011205a3a7695dc16978ad7432e819383c0d Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 17:13:28 +0100
Subject: [PATCH 07/30] extractMultiModelPDB improvements

---
 prody/proteins/interactions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 1787022f2..428e04be2 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3239,7 +3239,7 @@ def showSminaTermValues(data):
     return show
 
 
-def extractMultiModelPDB(multimodelPDB, folder_name):
+def extractMultiModelPDB(multimodelPDB, **kwargs):
     """Extracts individual PDB models from multimodel PDB and places them into the pointed directory.
     If used for calculating calcSignatureInteractions align the models.
 

From f2956d96bcc5a099f4a80e646e19de91ffcb7c89 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 17:44:15 +0100
Subject: [PATCH 08/30] createFoldseekAlignment

---
 prody/proteins/interactions.py | 70 +++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 428e04be2..1e7c4a106 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -51,7 +51,8 @@
            'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
            'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
            'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
-           'saveInteractionsAsDummyAtoms', 'extractMultiModelPDB', 'calcSignatureInteractions']
+           'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 
+           'extractMultiModelPDB', 'calcSignatureInteractions']
 
 
 def cleanNumbers(listContacts):
@@ -3239,6 +3240,73 @@ def showSminaTermValues(data):
     return show
 
 
+def createFoldseekAlignment(prot_seq, prot_foldseek, *kwargs):
+    """Aligns sequences from prot_seq with homologous sequences identified in prot_foldseek, 
+    generating a multiple sequence alignment.
+    
+    :arg prot_seq: The natural sequence extracted from the PDB (seq file)
+    :type prot_seq: str
+    
+    :arg prot_foldseek: The results from foldseek (foldseek file)
+    :type prot_foldseek: str
+
+    :arg msa_output_name: The natural sequence extracted from the PDB (msa file)
+    :type msa_output_name: str
+    """
+
+    msa_output_name = kwargs.pop('msa_output_name', 'prot_struc.msa')
+
+    def find_match_index(tar_nogap, nat_seq):
+        tar_nogap_str = ''.join(tar_nogap)
+        nat_seq_str = ''.join(nat_seq)
+        index = nat_seq_str.find(tar_nogap_str)
+        return index
+
+    # Read input files
+    with open(prot_seq, 'r') as f:
+        file1 = f.readlines()
+
+    with open(prot_foldseek, 'r') as f:
+        file2 = f.readlines()
+
+    # Open output file
+    with open(msa_output_name, 'w') as fp:
+        nat_seq = list(file1[0].strip())
+        
+        # Write the natural sequence to the output file
+        fp.write(''.join(nat_seq) + "\n")
+        
+        # Process each foldseek entry
+        for line in file2:
+            entries = line.split()
+            
+            if float(entries[2]) >= 0.5:
+                tar_seq = list(entries[11].strip())
+                mat_seq = list(entries[12].strip())
+                
+                tar_nogap = []
+                processed_mat = []
+                
+                for j in range(len(tar_seq)):
+                    if tar_seq[j] != '-':
+                        tar_nogap.append(tar_seq[j])
+                        processed_mat.append(mat_seq[j])
+                
+                match_index = find_match_index(tar_nogap, nat_seq)
+                end_index = match_index + len(tar_nogap)
+                m = 0
+                
+                for l in range(len(nat_seq)):
+                    if l < match_index:
+                        fp.write("-")
+                    elif l >= match_index and l < end_index:
+                        fp.write(processed_mat[m])
+                        m += 1
+                    else:
+                        fp.write("-")
+                fp.write("\n")
+
+
 def extractMultiModelPDB(multimodelPDB, **kwargs):
     """Extracts individual PDB models from multimodel PDB and places them into the pointed directory.
     If used for calculating calcSignatureInteractions align the models.

From ca717195fd747d34d44d8a751a60795f63fdef30 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 19:02:17 +0100
Subject: [PATCH 09/30] fix in kwargs, createFoldseekAlignment

---
 prody/proteins/interactions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 1e7c4a106..6df421e9a 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3240,7 +3240,7 @@ def showSminaTermValues(data):
     return show
 
 
-def createFoldseekAlignment(prot_seq, prot_foldseek, *kwargs):
+def createFoldseekAlignment(prot_seq, prot_foldseek, **kwargs):
     """Aligns sequences from prot_seq with homologous sequences identified in prot_foldseek, 
     generating a multiple sequence alignment.
     

From f736b8a43f143bc9972fb7af7fc9f22c49542d81 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 21:05:11 +0100
Subject: [PATCH 10/30] LOGGER info added to SignInt functions

---
 prody/proteins/interactions.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 6df421e9a..c94f359c5 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -324,7 +324,7 @@ def remove_empty_strings(row):
 
 def log_message(message, level="INFO"):
     """Log a message with a specified log level."""
-    print("[{}] {}".format(level, message))
+    LOGGER.info("[{}] {}".format(level, message))
 
 def is_module_installed(module_name):
     """Check if a Python module is installed."""
@@ -3305,6 +3305,8 @@ def find_match_index(tar_nogap, nat_seq):
                     else:
                         fp.write("-")
                 fp.write("\n")
+    
+    LOGGER.info("MSA file is now created, and saved as {}.".format(msa_output_name))
 
 
 def extractMultiModelPDB(multimodelPDB, **kwargs):
@@ -3317,7 +3319,8 @@ def extractMultiModelPDB(multimodelPDB, **kwargs):
     :arg folder_name: The name of the folder to which PDBs will be extracted
     :type folder_name: str
     """
-
+    import os
+    
     folder_name = kwargs.pop('folder_name', 'struc_homologs')
     
     with open(multimodelPDB, 'r') as f:
@@ -3345,6 +3348,8 @@ def extractMultiModelPDB(multimodelPDB, **kwargs):
 
         if sig3 == 'ATOM' and fp:
             fp.write("{}\n".format(line))
+            
+    LOGGER.info("Individual models are saved in {}.".format(folder_name))
 
 
 def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
@@ -3360,6 +3365,7 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     :arg fixer: The method for fixing lack of hydrogen bonds
     :type fixer: 'pdbfixer' or 'openbabel'
     """
+    import os
     
     functions = {
         "HydrogenBonds": calcHydrogenBonds,

From 01126525b9a6050a03a19df11e43df94228bae72 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 10 Nov 2024 22:42:24 +0100
Subject: [PATCH 11/30] runFoldseek is added

---
 prody/proteins/interactions.py | 296 ++++++++++++++++++++++++++++++++-
 1 file changed, 295 insertions(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index c94f359c5..fd433508a 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -51,7 +51,7 @@
            'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
            'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
            'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
-           'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 
+           'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 'runFoldseek', 
            'extractMultiModelPDB', 'calcSignatureInteractions']
 
 
@@ -3352,6 +3352,300 @@ def extractMultiModelPDB(multimodelPDB, **kwargs):
     LOGGER.info("Individual models are saved in {}.".format(folder_name))
 
 
+def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold):
+    """This script processes a PDB file to extract a specified chain's sequence, searches for 
+    homologous structures using foldseek, and prepares alignment outputs for further analysis.
+    Before using the function, install Foldseek:
+    >>> conda install bioconda::foldseek
+    
+    :arg pdb_file: A PDB file path
+    :type pdb_file: str
+
+    :arg chain: Chain identifier
+    :type chain: str
+
+    :arg coverage_threshold: Coverage threshold 
+    :type coverage_threshold: int, float
+
+    :arg tm_threshold: TM-score threshold
+    :type tm_threshold: float
+    """
+    
+    import os
+    import subprocess
+    import re
+    import sys
+
+    # Define the amino acid conversion function
+    def aa_onelet(three_letter_code):
+        codes = {
+            'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C',
+            'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I',
+            'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P',
+            'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V',
+            'MSE': 'M'
+        }
+        return codes.get(three_letter_code)
+
+    # Function to extract the sequence from the PDB file
+    def extract_sequence_from_pdb(pdb_file, chain, output_file):
+        sequence = []
+        prev_resid = -9999
+
+        with open(pdb_file, 'r') as pdb:
+            for line in pdb:
+                if line.startswith("ATOM"):
+                    ch = line[21]
+                    resid = int(line[22:26].strip())
+                    aa = line[17:20].strip()
+
+                    if ch == chain and resid != prev_resid:
+                        one_aa = aa_onelet(aa)
+                        if one_aa:
+                            sequence.append(one_aa)
+                            prev_resid = resid
+
+        with open(output_file, 'w') as seq_file:
+            seq_file.write(''.join(sequence))
+
+    # Inputs
+    fpath = pdb_file.strip()
+    chain = chain.strip()
+    cov_threshold = float(coverage_threshold.strip())
+    tm_threshold = float(tm_threshold.strip())
+
+    # Filter PDB file
+    awk_command = "awk '{{if (substr($0, 22, 1) == \"{0}\") print}}'".format(chain)
+    subprocess.run("cat {0} | grep '^ATOM' | {1} > inp.pdb".format(fpath, awk_command), shell=True)
+
+    # Run foldseek and other commands
+    subprocess.run([
+        'foldseek', 'easy-search', 'inp.pdb', '../../../foldseek/pdb', 'prot.foldseek',
+        'tmp2', '--exhaustive-search', '1', '--format-output',
+        "query,target,qstart,qend,tstart,tend,qcov,tcov,qtmscore,ttmscore,rmsd,qaln,taln",
+        '-c', str(cov_threshold)
+    ])
+
+    # Extract sequence and write to prot.seq
+    extract_sequence_from_pdb('inp.pdb', chain, 'prot.seq')
+    createFoldseekAlignment('prot.seq', 'prot.foldseek', msa_output_name='prot_struc.msa')    
+    
+    # Read input files
+    with open('inp.pdb', 'r') as f:
+        file1 = f.readlines()
+
+    with open('prot.foldseek', 'r') as f:
+        file2 = f.readlines()
+
+    with open('prot_struc.msa', 'r') as f:
+        file3 = f.readlines()
+
+    # Open output files
+    fp1 = open("aligned_structures.pdb", 'w')
+    fp2 = open("shortlisted_resind.msa", 'w')
+    fp6 = open("seq_match_reconfirm.txt", 'w')
+    fp7 = open("aligned_structures_extended.pdb", 'w')
+    fp9 = open("shortlisted.foldseek", 'w')
+    fp10 = open("shortlisted.msa", 'w')
+
+    # Initialize variables
+    mod_count = 1
+    fp1.write("MODEL\t{0}\n".format(mod_count))
+    fp7.write("MODEL\t{0}\n".format(mod_count))
+
+    seq1 = list(file3[0].strip())
+    ind1 = 0
+    prev_id = -9999
+
+    # Process input PDB file
+    for line in file1:
+        line = line.strip()
+        id_ = line[0:4]
+        ch = line[21]
+        resid = int(line[22:26].strip())
+        aa = line[17:20]
+
+        if id_ == 'ATOM' and ch == chain and resid != prev_id:
+            prev_id = resid
+            one_aa = aa_onelet(aa)
+            if one_aa == seq1[ind1]:
+                fp1.write("{0}\n".format(line))
+                fp7.write("{0}\n".format(line))
+                fp2.write("{0} ".format(resid))
+                ind1 += 1
+            else:
+                print("Mismatch in sequence and structure of Query protein at Index {0}".format(ind1))
+                break
+        elif id_ == 'ATOM' and ch == chain and resid == prev_id:
+            fp1.write("{0}\n".format(line))
+            fp7.write("{0}\n".format(line))
+
+    fp1.write("TER\nENDMDL\n\n")
+    fp7.write("TER\nENDMDL\n\n")
+    fp2.write("\n")
+    fp10.write("{0}\n".format(file3[0].strip()))
+
+    # Processing foldseek results
+    os.makedirs("temp", exist_ok=True)
+
+    for i, entry in enumerate(file2):
+        entries = re.split(r'\s+', entry.strip())
+
+        if float(entries[8]) < tm_threshold:
+            continue
+
+        tstart = int(entries[4])
+        tend = int(entries[5])
+        pdb = entries[1][:4]
+        chain = entries[1][-1]
+        fname = "{0}.pdb".format(pdb)
+
+        # Download and process the target PDB file
+        subprocess.run(['wget', '-P', 'temp', "https://files.rcsb.org/download/{0}".format(fname)])
+
+        awk_command = "awk '{{if (substr($0, 22, 1) == \"{0}\") print}}'".format(chain)
+        subprocess.run("cat ./temp/{0} | grep -E '^(ATOM|HETATM)' | {1} > target.pdb".format(fname, awk_command), shell=True)
+
+        with open('target.pdb', 'r') as target_file:
+            file4 = target_file.readlines()
+
+        qseq = list(entries[11])
+        tseq = list(entries[12])
+
+        start_line = 0
+        prevind = -9999
+        tarind = 0
+
+        for j, line in enumerate(file4):
+            resid = int(line[22:26].strip())
+            if resid != prevind:
+                prevind = resid
+                tarind += 1
+            if tarind == tstart:
+                start_line = j
+                break
+
+        prevind = -9999
+        ind2 = 0
+        j = start_line
+        flag2 = False
+
+        with open("temp_coord.txt", 'w') as fp4, \
+             open("temp_resind.txt", 'w') as fp3, \
+             open("temp_seq.txt", 'w') as fp5:
+
+            for k in range(len(qseq)):
+                if tseq[k] != '-' and qseq[k] != '-':
+                    line = file4[j].strip()
+                    resid = int(line[22:26].strip())
+                    aa = line[17:20]
+                    one_aa = aa_onelet(aa)
+
+                    if one_aa == tseq[k]:
+                        fp3.write("{0} ".format(resid))
+                        fp5.write("{0}".format(one_aa))
+                        prevind = resid
+                    else:
+                        print("Mismatch in sequence and structure of Target protein {0}{1} at line {2} Index {3}-{4} ne {5}".format(
+                            pdb, chain, j, k, one_aa, tseq[k]))
+                        flag2 = True
+                        break
+
+                    while resid == prevind:
+                        fp4.write("{0}\n".format(line))
+                        j += 1
+                        if j >= len(file4):
+                            break
+                        line = file4[j].strip()
+                        resid = int(line[22:26].strip())
+                elif tseq[k] != '-' and qseq[k] == '-':
+                    line = file4[j].strip()
+                    resid = int(line[22:26].strip())
+                    aa = line[17:20]
+                    one_aa = aa_onelet(aa)
+
+                    if one_aa == tseq[k]:
+                        prevind = resid
+                    else:
+                        print("Mismatch in sequence and structure of Target protein {0}{1} at Line {2} Index {3}-{4} ne {5}".format(
+                            pdb, chain, j, k, one_aa, tseq[k]))
+                        flag2 = True
+                        break
+
+                    while resid == prevind:
+                        j += 1
+                        if j >= len(file4):
+                            break
+                        line = file4[j].strip()
+                        resid = int(line[22:26].strip())
+
+                elif tseq[k] == '-' and qseq[k] != '-':
+                    fp3.write("- ")
+                    fp5.write("-")
+
+            if flag2:
+                continue
+
+        with open("temp_coord.txt", 'r') as f:
+            tmpcord = f.readlines()
+
+        with open("temp_resind.txt", 'r') as f:
+            tmpresind = f.readlines()
+
+        with open("temp_seq.txt", 'r') as f:
+            tmpseq = f.readlines()
+
+        ind3 = i + 1
+        seq1 = list(file3[ind3].strip())
+        seq2 = tmpresind[0].strip().split()
+
+        fp9.write("{0}".format(file2[i]))
+        fp10.write("{0}\n".format(file3[ind3].strip()))
+
+        for m in range(len(seq1)):
+            if seq1[m] == '-':
+                fp2.write("{0} ".format(seq1[m]))
+            else:
+                break
+
+        for n in range(len(seq2)):
+            fp2.write("{0} ".format(seq2[n]))
+            fp6.write("{0}".format(seq1[m]))
+            m += 1
+
+        for o in range(m, len(seq1)):
+            fp2.write("{0} ".format(seq1[m]))
+
+        fp2.write("\n")
+        fp6.write("\n{0}\n\n\n".format(tmpseq[0]))
+
+        mod_count += 1
+        fp1.write("MODEL\t{0}\n".format(mod_count))
+        fp7.write("MODEL\t{0}\n".format(mod_count))
+
+        for line in file4:
+            if line.strip():
+                fp7.write("{0}\n".format(line.strip()))
+
+        for line in tmpcord:
+            fp1.write("{0}".format(line))
+
+        fp1.write("TER\nENDMDL\n\n")
+        fp7.write("TER\nENDMDL\n\n")
+
+    # Cleanup
+    fp1.close()
+    fp2.close()
+    fp6.close()
+    fp7.close()
+    fp9.close()
+    fp10.close()
+
+    extractMultiModelPDB('aligned_structures.pdb', folder_name='struc_homologs')
+    subprocess.run("rm -f inp.pdb prot.seq target.pdb temp_coord.txt temp_resind.txt temp_seq.txt", shell=True)
+    subprocess.run("rm -rf tmp2 temp", shell=True)
+
+
 def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     """Analyzes protein structures to identify various interactions using InSty. 
     Processes data from the MSA file and folder with selected models.

From f0565f2ed6bfcbff0ceb9f9186df085ad6f3ef56 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Mon, 11 Nov 2024 19:22:03 +0100
Subject: [PATCH 12/30] LOGGER changes

---
 prody/proteins/interactions.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index fd433508a..44c1dd82c 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3352,7 +3352,7 @@ def extractMultiModelPDB(multimodelPDB, **kwargs):
     LOGGER.info("Individual models are saved in {}.".format(folder_name))
 
 
-def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold):
+def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold, **kwargs):
     """This script processes a PDB file to extract a specified chain's sequence, searches for 
     homologous structures using foldseek, and prepares alignment outputs for further analysis.
     Before using the function, install Foldseek:
@@ -3365,10 +3365,13 @@ def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold):
     :type chain: str
 
     :arg coverage_threshold: Coverage threshold 
-    :type coverage_threshold: int, float
+    :type coverage_threshold: float
 
     :arg tm_threshold: TM-score threshold
     :type tm_threshold: float
+    
+    :arg database_folder: Folder with the database
+    :type database_folder: str
     """
     
     import os
@@ -3376,6 +3379,8 @@ def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold):
     import re
     import sys
 
+    database_folder = kwargs.pop('database_folder', '../../../foldseek/pdb')
+
     # Define the amino acid conversion function
     def aa_onelet(three_letter_code):
         codes = {
@@ -3413,6 +3418,8 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
     chain = chain.strip()
     cov_threshold = float(coverage_threshold.strip())
     tm_threshold = float(tm_threshold.strip())
+    
+    print(cov_threshold, type(cov_threshold))
 
     # Filter PDB file
     awk_command = "awk '{{if (substr($0, 22, 1) == \"{0}\") print}}'".format(chain)
@@ -3420,12 +3427,12 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
 
     # Run foldseek and other commands
     subprocess.run([
-        'foldseek', 'easy-search', 'inp.pdb', '../../../foldseek/pdb', 'prot.foldseek',
+        'foldseek', 'easy-search', 'inp.pdb', database_folder, 'prot.foldseek',
         'tmp2', '--exhaustive-search', '1', '--format-output',
         "query,target,qstart,qend,tstart,tend,qcov,tcov,qtmscore,ttmscore,rmsd,qaln,taln",
         '-c', str(cov_threshold)
     ])
-
+    
     # Extract sequence and write to prot.seq
     extract_sequence_from_pdb('inp.pdb', chain, 'prot.seq')
     createFoldseekAlignment('prot.seq', 'prot.foldseek', msa_output_name='prot_struc.msa')    
@@ -3644,7 +3651,7 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
     extractMultiModelPDB('aligned_structures.pdb', folder_name='struc_homologs')
     subprocess.run("rm -f inp.pdb prot.seq target.pdb temp_coord.txt temp_resind.txt temp_seq.txt", shell=True)
     subprocess.run("rm -rf tmp2 temp", shell=True)
-
+    
 
 def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     """Analyzes protein structures to identify various interactions using InSty. 

From ba342f928234037c974012194aa2bc1afb479494 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Mon, 11 Nov 2024 19:27:47 +0100
Subject: [PATCH 13/30] cov-mode parameter to foldseek run

---
 prody/proteins/interactions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 44c1dd82c..df6891306 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3430,7 +3430,7 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
         'foldseek', 'easy-search', 'inp.pdb', database_folder, 'prot.foldseek',
         'tmp2', '--exhaustive-search', '1', '--format-output',
         "query,target,qstart,qend,tstart,tend,qcov,tcov,qtmscore,ttmscore,rmsd,qaln,taln",
-        '-c', str(cov_threshold)
+        '-c', str(cov_threshold), '--cov-mode', '0'
     ])
     
     # Extract sequence and write to prot.seq

From 05b4621ab74d0f518e43fb08f283ae8a9c2ffac5 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Mon, 11 Nov 2024 22:05:08 +0100
Subject: [PATCH 14/30] runFoldseek improvements with kwargs, docs

---
 prody/proteins/interactions.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index df6891306..d4a28930b 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3352,11 +3352,16 @@ def extractMultiModelPDB(multimodelPDB, **kwargs):
     LOGGER.info("Individual models are saved in {}.".format(folder_name))
 
 
-def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold, **kwargs):
+def runFoldseek(pdb_file, chain, **kwargs):
     """This script processes a PDB file to extract a specified chain's sequence, searches for 
     homologous structures using foldseek, and prepares alignment outputs for further analysis.
+    
     Before using the function, install Foldseek:
     >>> conda install bioconda::foldseek
+    More information can be found:
+    https://github.com/steineggerlab/foldseek?tab=readme-ov-file#databasesand
+
+    This function will not work under Windows.
     
     :arg pdb_file: A PDB file path
     :type pdb_file: str
@@ -3365,12 +3370,16 @@ def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold, **kwargs):
     :type chain: str
 
     :arg coverage_threshold: Coverage threshold 
+            by default 0.3
     :type coverage_threshold: float
 
     :arg tm_threshold: TM-score threshold
+            by default 0.5
     :type tm_threshold: float
     
     :arg database_folder: Folder with the database
+            by default '~/Donwload/foldseek/pdb'
+            To download the database use: 'foldseek databases PDB pdb tmp' in the console 
     :type database_folder: str
     """
     
@@ -3379,8 +3388,10 @@ def runFoldseek(pdb_file, chain, coverage_threshold, tm_threshold, **kwargs):
     import re
     import sys
 
-    database_folder = kwargs.pop('database_folder', '../../../foldseek/pdb')
-
+    database_folder = kwargs.pop('database_folder', '~/Donwload/foldseek/pdb')
+    coverage_threshold = kwargs.pop('coverage_threshold', 0.3)
+    tm_threshold = kwargs.pop('tm_threshold', 0.5)    
+    
     # Define the amino acid conversion function
     def aa_onelet(three_letter_code):
         codes = {
@@ -3416,11 +3427,9 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
     # Inputs
     fpath = pdb_file.strip()
     chain = chain.strip()
-    cov_threshold = float(coverage_threshold.strip())
-    tm_threshold = float(tm_threshold.strip())
+    cov_threshold = float(coverage_threshold)
+    tm_threshold = float(tm_threshold)
     
-    print(cov_threshold, type(cov_threshold))
-
     # Filter PDB file
     awk_command = "awk '{{if (substr($0, 22, 1) == \"{0}\") print}}'".format(chain)
     subprocess.run("cat {0} | grep '^ATOM' | {1} > inp.pdb".format(fpath, awk_command), shell=True)

From d31d183d8ab532f258f919df6560c50eeff17ffe Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Mon, 11 Nov 2024 22:31:44 +0100
Subject: [PATCH 15/30] changes in imports

---
 prody/proteins/interactions.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index d4a28930b..ee2b55f9a 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -32,8 +32,6 @@
 from prody.ensemble import Ensemble
 
 import multiprocessing
-import matplotlib.pylab as plt
-import os
 from .fixer import *
 
 
@@ -314,10 +312,6 @@ def showPairEnergy(data, **kwargs):
 
 
 # SignatureInteractions supporting functions
-from Bio.PDB.Polypeptide import three_to_one
-import shutil
-import importlib.util
-
 def remove_empty_strings(row):
     """Remove empty strings from a list."""
     return [elem for elem in row if elem != '']
@@ -328,15 +322,18 @@ def log_message(message, level="INFO"):
 
 def is_module_installed(module_name):
     """Check if a Python module is installed."""
+    import importlib.util
     spec = importlib.util.find_spec(module_name)
     return spec is not None
 
 def is_command_installed(command):
     """Check if a command-line tool is installed."""
+    import shutil
     return shutil.which(command) is not None
 
 def load_residues_from_pdb(pdb_file):
     """Extract residue numbers and their corresponding one-letter amino acid codes from a PDB file."""
+    from Bio.PDB.Polypeptide import three_to_one
     structure = parsePDB(pdb_file)
     residues = structure.iterResidues()
     residue_dict = {}
@@ -373,6 +370,7 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     mapping_num = filtered_mapping.astype(float)
 
     # Load the one-letter amino acid codes from model1.pdb
+    import os 
     pdb_model_path = os.path.join(pdb_folder, 'model1.pdb')
     residue_dict = load_residues_from_pdb(pdb_model_path)
 
@@ -488,6 +486,7 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
 
 def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
     """Plot horizontal bar plots of percentages, splitting the data into fixed-sized plots."""
+    import matplotlib.pylab as plt
     plt.rcParams.update({'font.size': 20})
 
     # Set default value for n_per_plot if None is passed
@@ -3675,6 +3674,7 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     :arg fixer: The method for fixing lack of hydrogen bonds
     :type fixer: 'pdbfixer' or 'openbabel'
     """
+    
     import os
     
     functions = {

From 9b79bd7b458ab1a0368efe967a4aba29936d3412 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Tue, 12 Nov 2024 21:36:51 +0100
Subject: [PATCH 16/30] kwargs for plot_barh

---
 prody/proteins/interactions.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index ee2b55f9a..b81a35c1e 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -484,12 +484,24 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     return output_data, fixed_files  # Return fixed_files to remove later
 
 
-def plot_barh(result, bond_type, n_per_plot=None, min_height=8):
-    """Plot horizontal bar plots of percentages, splitting the data into fixed-sized plots."""
+def plot_barh(result, bond_type, **kwargs):
+    """Plot horizontal bar plots of percentages of interactions, splitting the data into fixed-sized plots.
+
+    :arg n_per_plot: The number of results per one plot
+        by default is 20 if set to None 
+    :type n_per_plot: int
+    
+    :arg min_height: Size of the bar plot
+        by default is 8
+    :type min_height: int
+    """
+
     import matplotlib.pylab as plt
     plt.rcParams.update({'font.size': 20})
 
-    # Set default value for n_per_plot if None is passed
+    n_per_plot = kwargs.pop('n_per_plot', None)
+    min_height = kwargs.pop('min_height', 8)
+    
     if n_per_plot is None:
         n_per_plot = 20
 

From 6e342c44c40c30282e3ec28098539fbd57432c7b Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Tue, 12 Nov 2024 21:45:33 +0100
Subject: [PATCH 17/30] fixing typos

---
 prody/proteins/interactions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index b81a35c1e..c00da0121 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3330,6 +3330,7 @@ def extractMultiModelPDB(multimodelPDB, **kwargs):
     :arg folder_name: The name of the folder to which PDBs will be extracted
     :type folder_name: str
     """
+    
     import os
     
     folder_name = kwargs.pop('folder_name', 'struc_homologs')
@@ -3389,7 +3390,7 @@ def runFoldseek(pdb_file, chain, **kwargs):
     :type tm_threshold: float
     
     :arg database_folder: Folder with the database
-            by default '~/Donwload/foldseek/pdb'
+            by default '~/Downloads/foldseek/pdb'
             To download the database use: 'foldseek databases PDB pdb tmp' in the console 
     :type database_folder: str
     """
@@ -3399,7 +3400,7 @@ def runFoldseek(pdb_file, chain, **kwargs):
     import re
     import sys
 
-    database_folder = kwargs.pop('database_folder', '~/Donwload/foldseek/pdb')
+    database_folder = kwargs.pop('database_folder', '~/Downloads/foldseek/pdb')
     coverage_threshold = kwargs.pop('coverage_threshold', 0.3)
     tm_threshold = kwargs.pop('tm_threshold', 0.5)    
     

From 5450f8caa2ff14a62a1bf18184aecba8c185f53d Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Tue, 12 Nov 2024 23:25:09 +0100
Subject: [PATCH 18/30] kwargs for calcSignatureInteractions, optional removal
 of tmp files

---
 prody/proteins/interactions.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index c00da0121..fd6124a3d 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3674,7 +3674,7 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
     subprocess.run("rm -rf tmp2 temp", shell=True)
     
 
-def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
+def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
     """Analyzes protein structures to identify various interactions using InSty. 
     Processes data from the MSA file and folder with selected models.
     
@@ -3685,11 +3685,21 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
     :type PDB_folder: str
 
     :arg fixer: The method for fixing lack of hydrogen bonds
+            by default is 'pdbfixer'
     :type fixer: 'pdbfixer' or 'openbabel'
+    
+    :arg remove_tmp_files: Removing intermediate files that are created in the process
+            by default is True
+    :type remove_tmp_files: True or False
     """
     
     import os
     
+    fixer = kwargs.pop('fixer', 'pdbfixer')
+    remove_tmp_files = kwargs.pop('remove_tmp_files', True)
+    n_per_plot = kwargs.pop('n_per_plot', None)
+    min_height = kwargs.pop('min_height', 8)
+    
     functions = {
         "HydrogenBonds": calcHydrogenBonds,
         "SaltBridges": calcSaltBridges,
@@ -3719,15 +3729,15 @@ def calcSignatureInteractions(mapping_file, PDB_folder, fixer='pdbfixer'):
         result, fixed_files = result
 
         # Proceed with plotting
-        n = None
-        plot_barh(result, bond_type, n)
+        plot_barh(result, bond_type, n_per_plot=n_per_plot, min_height=min_height)
 
     # Remove all fixed files at the end
-    if 'fixed_files' in locals():
-        for fixed_file in fixed_files:
-            if os.path.exists(fixed_file):
-                os.remove(fixed_file)
-                log_message("Removed fixed file: {}".format(fixed_file))
+    if remove_tmp_files == True:
+        if 'fixed_files' in locals():
+            for fixed_file in fixed_files:
+                if os.path.exists(fixed_file):
+                    os.remove(fixed_file)
+                    log_message("Removed fixed file: {}".format(fixed_file))
 
 
 

From 1ec47c5993d05c2be42e3977a1ea1f8143a35539 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Wed, 13 Nov 2024 13:41:13 +0100
Subject: [PATCH 19/30] if for 'fixed_files' removed

---
 prody/proteins/interactions.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index fd6124a3d..c80140689 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3733,11 +3733,10 @@ def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
 
     # Remove all fixed files at the end
     if remove_tmp_files == True:
-        if 'fixed_files' in locals():
-            for fixed_file in fixed_files:
-                if os.path.exists(fixed_file):
-                    os.remove(fixed_file)
-                    log_message("Removed fixed file: {}".format(fixed_file))
+        for fixed_file in fixed_files:
+            if os.path.exists(fixed_file):
+                os.remove(fixed_file)
+                log_message("Removed fixed file: {}".format(fixed_file))
 
 
 

From 6d348784b93d482231a3dd43662ab2f549429672 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Wed, 13 Nov 2024 14:35:53 +0100
Subject: [PATCH 20/30] 3to1 letter code is not replaced by AAMAP

---
 prody/proteins/interactions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index c80140689..e5d2050b8 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -333,7 +333,7 @@ def is_command_installed(command):
 
 def load_residues_from_pdb(pdb_file):
     """Extract residue numbers and their corresponding one-letter amino acid codes from a PDB file."""
-    from Bio.PDB.Polypeptide import three_to_one
+    from prody.atomic.atomic import AAMAP
     structure = parsePDB(pdb_file)
     residues = structure.iterResidues()
     residue_dict = {}
@@ -342,7 +342,7 @@ def load_residues_from_pdb(pdb_file):
         resnum = res.getResnum()
         resname = res.getResname()  # Three-letter amino acid code
         try:
-            one_letter_code = three_to_one(resname)  # Convert to one-letter code
+            one_letter_code = AAMAP[resname]
             residue_dict[resnum] = one_letter_code
         except KeyError:
             log_message("Unknown residue: {} at position {}".format(resname, resnum), "WARNING")

From ee64b51fb957f0f3109cf6aacf63869a6f979c2d Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Wed, 13 Nov 2024 14:59:56 +0100
Subject: [PATCH 21/30] Checks for database_folder exists

---
 prody/proteins/interactions.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index e5d2050b8..9cc43962a 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3404,6 +3404,10 @@ def runFoldseek(pdb_file, chain, **kwargs):
     coverage_threshold = kwargs.pop('coverage_threshold', 0.3)
     tm_threshold = kwargs.pop('tm_threshold', 0.5)    
     
+    full_path = os.path.expanduser(database_folder)
+    if not os.path.exists(full_path.strip('pdb')):
+        raise ValueError('The required database is not found in {0}. Please download it first.'.format(database_folder.strip('pdb')))
+    
     # Define the amino acid conversion function
     def aa_onelet(three_letter_code):
         codes = {

From 8c19c0fbb067d8ed96625e9f56adacdd58b65c94 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Wed, 13 Nov 2024 15:13:34 +0100
Subject: [PATCH 22/30] additional checks and docs improvements

---
 prody/proteins/interactions.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 9cc43962a..629898632 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3374,6 +3374,9 @@ def runFoldseek(pdb_file, chain, **kwargs):
     https://github.com/steineggerlab/foldseek?tab=readme-ov-file#databasesand
 
     This function will not work under Windows.
+    Example usage: runFoldseek('5kqm.pdb', 'A', database_folder='~/Downloads/foldseek/pdb')
+    where previous a folder called 'foldseek' were created and PDB database was uploaded using:
+    >>> foldseek databases PDB pdb tmp   (Linux console) 
     
     :arg pdb_file: A PDB file path
     :type pdb_file: str
@@ -3404,6 +3407,9 @@ def runFoldseek(pdb_file, chain, **kwargs):
     coverage_threshold = kwargs.pop('coverage_threshold', 0.3)
     tm_threshold = kwargs.pop('tm_threshold', 0.5)    
     
+    if not isinstance(pdb_file, str):
+        raise TypeError('Please provide the name of the PDB file.')
+    
     full_path = os.path.expanduser(database_folder)
     if not os.path.exists(full_path.strip('pdb')):
         raise ValueError('The required database is not found in {0}. Please download it first.'.format(database_folder.strip('pdb')))
@@ -3682,6 +3688,8 @@ def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
     """Analyzes protein structures to identify various interactions using InSty. 
     Processes data from the MSA file and folder with selected models.
     
+    Example usage: calcSignatureInteractions('shortlisted_resind.msa','./struc_homologs')
+    
     :arg mapping_file: Aligned residue indices, MSA file type
     :type mapping_file: str
 

From 22ff338198a530dff27720475421ad96628b26a7 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sat, 16 Nov 2024 13:55:17 +0100
Subject: [PATCH 23/30] runDali added

---
 prody/proteins/interactions.py | 101 ++++++++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 629898632..f799f3398 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -33,6 +33,8 @@
 
 import multiprocessing
 from .fixer import *
+from .compare import *
+from prody.measure import calcTransformation, calcDistance, calcRMSD, superpose
 
 
 __all__ = ['calcHydrogenBonds', 'calcChHydrogenBonds', 'calcSaltBridges',
@@ -49,7 +51,7 @@
            'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
            'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
            'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
-           'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 'runFoldseek', 
+           'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 'runFoldseek', 'runDali', 
            'extractMultiModelPDB', 'calcSignatureInteractions']
 
 
@@ -3683,6 +3685,103 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
     subprocess.run("rm -f inp.pdb prot.seq target.pdb temp_coord.txt temp_resind.txt temp_seq.txt", shell=True)
     subprocess.run("rm -rf tmp2 temp", shell=True)
     
+    
+def runDali(pdb, chain, **kwargs):
+    """This function calls searchDali() and downloads all the PDB files, separate by chains, add hydrogens and missing side chains,
+    and finally align them and put into the newly created folder.
+       
+    :arg pdb: A PDB code
+    :type pdb: str
+
+    :arg chain: chain identifier
+    :type chain: str
+
+    :arg cutoff_len: Length of aligned residues < cutoff_len (must be an integer or a float between 0 and 1)
+            See searchDali for more details 
+            by default 0.5
+    :type cutoff_len: float
+
+    :arg cutoff_rmsd: RMSD cutoff (see searchDali)
+            by default 1.0
+    :type cutoff_rmsd: float
+    
+    :arg subsetDali: fullPDB, PDB25, PDB50, PDB90
+            by default is 'fullPDB'
+    :type subsetDali: str    
+    
+    :arg fixer: The method for fixing lack of hydrogen bonds
+            by default is 'pdbfixer'
+    :type fixer: 'pdbfixer' or 'openbabel'
+    
+    :arg subset: subsets of atoms: 'ca', 'bb', 'heavy', 'noh', 'all'  (see matchChains())
+            by default is 'ca'
+    :type subset: str
+
+    :arg seqid: Minimum value of the sequence identity (see matchChains())
+            by default 5
+    :type seqid: float
+    
+    :arg overlap: percent overlap (see matchChains())
+            by default 50
+    :type overlap: float
+
+    :arg folder_name: Folder where the results will be collected
+            by default is 'struc_homologs'
+    :type folder_name: str
+    """
+
+    import os
+    import shutil
+    from prody.database import dali 
+    
+    cutoff_len = kwargs.pop('cutoff_len', 0.5)
+    cutoff_rmsd = kwargs.pop('cutoff_rmsd', 1.0)
+    fixer = kwargs.pop('fixer', 'pdbfixer')
+    subset_Dali = kwargs.pop('subset_Dali', 'fullPDB')
+    subset = kwargs.pop('subset', 'ca')
+    seqid = kwargs.pop('seqid', 5)
+    overlap = kwargs.pop('overlap', 50)
+    folder_name = kwargs.pop('folder_name', 'struc_homologs')
+    
+    dali_rec = dali.searchDali(pdb, chain, subset=subset_Dali)
+
+    while not dali_rec.isSuccess:
+        dali_rec.fetch()
+    
+    pdb_ids = dali_rec.filter(cutoff_len=cutoff_len, cutoff_rmsd=cutoff_rmsd)
+    pdb_hits = [ (i[:4], i[4:]) for i in pdb_ids ]
+    
+    list_pdbs = []
+    LOGGER.info('Separating chains and saving into PDB file')
+    for i in pdb_hits:
+        LOGGER.info('PDB code {} and chain {}'.format(i[0], i[1]))
+        p = parsePDB(i[0]).select('chain '+i[1]+' and protein')
+        writePDB(i[0]+i[1]+'.pdb', p)
+        list_pdbs.append(i[0]+i[1]+'.pdb')
+    
+    #LOGGER.info('Number of selected structures: ', len(list_pdbs))
+    LOGGER.info('Adding hydrogens to the structures..')
+    new_pdbids = fixStructuresMissingAtoms(list_pdbs, method='pdbfixer', model_residues=True, overwrite=True)
+
+    os.makedirs(folder_name)
+    structures = parsePDB(new_pdbids)
+    target = structures[0]
+    rmsds = []
+    for mobile in structures[1:]:
+        try:
+            LOGGER.info('Aligning the structures..')
+            i = mobile.getTitle()
+            LOGGER.info(i)
+            matches = matchChains(mobile.protein, target.protein, subset=subset, seqid=seqid, overlap=overlap)
+            m = matches[0]
+            m0_alg, T = superpose(m[0], m[1], weights=m[0].getFlags("mapped"))
+            rmsds.append(calcRMSD(m[0], m[1], weights=m[0].getFlags("mapped")))
+            source_file = 'align__'+i+'.pdb'
+            writePDB(source_file, mobile)
+            shutil.move(source_file, os.path.join(folder_name, os.path.basename(source_file)))
+        except TypeError:
+            raise TypeError('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
+    
 
 def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
     """Analyzes protein structures to identify various interactions using InSty. 

From 8a0019d491529c9fc1381cb0baa0f4e03da1fee1 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sat, 16 Nov 2024 18:22:43 +0100
Subject: [PATCH 24/30] TypeError replaced by LOGGER.warn in runDali

---
 prody/proteins/interactions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 7ef226354..afb053622 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3781,8 +3781,8 @@ def runDali(pdb, chain, **kwargs):
             source_file = 'align__'+i+'.pdb'
             writePDB(source_file, mobile)
             shutil.move(source_file, os.path.join(folder_name, os.path.basename(source_file)))
-        except TypeError:
-            raise TypeError('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
+        except:
+            LOGGER.warn('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
     
 
 def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):

From f57418e1f14c3529f5c6905f9c2faf3cd87b52e2 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Tue, 19 Nov 2024 08:31:29 +0100
Subject: [PATCH 25/30] runBLAST form SignInter added

---
 prody/proteins/interactions.py | 87 +++++++++++++++++++++++++++++++++-
 1 file changed, 86 insertions(+), 1 deletion(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index afb053622..6ae348761 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -52,7 +52,7 @@
            'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
            'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
            'saveInteractionsAsDummyAtoms', 'createFoldseekAlignment', 'runFoldseek', 'runDali', 
-           'extractMultiModelPDB', 'calcSignatureInteractions']
+           'runBLAST', 'extractMultiModelPDB', 'calcSignatureInteractions']
 
 
 def cleanNumbers(listContacts):
@@ -3783,7 +3783,92 @@ def runDali(pdb, chain, **kwargs):
             shutil.move(source_file, os.path.join(folder_name, os.path.basename(source_file)))
         except:
             LOGGER.warn('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
+
+
+def runBLAST(pdb, chain, **kwargs):
+    """This function calls blastPDB to find homologs and downloads all of them in PDB format to the local directory,
+    separate chains that were identified by BLAST, add hydrogens and missing side chains,
+    and finally align them and put into the newly created folder.
+       
+    :arg pdb: A PDB code
+    :type pdb: str
+
+    :arg chain: chain identifier
+    :type chain: str
+
+    :arg fixer: The method for fixing lack of hydrogen bonds
+            by default is 'pdbfixer'
+    :type fixer: 'pdbfixer' or 'openbabel'
+    
+    :arg subset: subsets of atoms: 'ca', 'bb', 'heavy', 'noh', 'all'  (see matchChains())
+            by default is 'bb'
+    :type subset: str
+
+    :arg seqid: Minimum value of the sequence identity (see matchChains())
+            by default 90
+    :type seqid: float
+    
+    :arg overlap: percent overlap (see matchChains())
+            by default 50
+    :type overlap: float
+
+    :arg folder_name: Folder where the results will be collected
+            by default is 'struc_homologs'
+    :type folder_name: str
+    """
+    
+    import os
+    import shutil
+    from prody.proteins.blastpdb import blastPDB
+    
+    fixer = kwargs.pop('fixer', 'pdbfixer')
+    seqid = kwargs.pop('seqid', 90)
+    overlap = kwargs.pop('overlap', 50)
+    subset = kwargs.pop('subset', 'bb')
+    folder_name = kwargs.pop('folder_name', 'struc_homologs')
+
+    ref_prot = parsePDB(pdb)
+    ref_hv = ref_prot.getHierView()[chain]
+    sequence = ref_hv.getSequence()
+
+    blast_record = blastPDB(sequence)
+    while not blast_record.isSuccess:
+        blast_record.fetch()
+
+    pdb_hits = []
+    for key, item in blast_record.getHits(seqid).items():
+        pdb_hits.append((key, item['chain_id']))
+
+    list_pdbs = []
+    LOGGER.info('Separating chains and saving into PDB file')
+    for i in pdb_hits:
+        LOGGER.info('PDB code {} and chain {}'.format(i[0], i[1]))
+        p = parsePDB(i[0]).select('chain '+i[1]+' and protein')
+        writePDB(i[0]+i[1]+'.pdb', p)
+        list_pdbs.append(i[0]+i[1]+'.pdb')
     
+    LOGGER.info('Adding hydrogens to the structures..')
+    new_pdbids = fixStructuresMissingAtoms(list_pdbs, method='pdbfixer', model_residues=True, overwrite=True)
+
+    os.makedirs(folder_name)
+    structures = parsePDB(new_pdbids)
+    target = structures[0]
+    rmsds = []
+    for mobile in structures[1:]:
+        try:
+            LOGGER.info('Aligning the structures..')
+            i = mobile.getTitle()
+            LOGGER.info(i)
+            matches = matchChains(mobile.protein, target.protein, subset=subset, seqid=seqid, overlap=overlap)
+            m = matches[0]
+            m0_alg, T = superpose(m[0], m[1], weights=m[0].getFlags("mapped"))
+            rmsds.append(calcRMSD(m[0], m[1], weights=m[0].getFlags("mapped")))
+            source_file = 'align__'+i+'.pdb'
+            writePDB(source_file, mobile)
+            shutil.move(source_file, os.path.join(folder_name, os.path.basename(source_file)))
+        except:
+            LOGGER.warn('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
+
 
 def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
     """Analyzes protein structures to identify various interactions using InSty. 

From 50a37d6e39cbc3ee98206456c520e3f76923aefe Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Fri, 22 Nov 2024 22:47:25 +0100
Subject: [PATCH 26/30] calcSignatureInteractions includes now analysis with
 MSA file [further reorganization needed]

---
 prody/proteins/interactions.py | 112 ++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 42 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 6ae348761..0e0c74a67 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3870,18 +3870,21 @@ def runBLAST(pdb, chain, **kwargs):
             LOGGER.warn('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
 
 
-def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
+def calcSignatureInteractions(PDB_folder, **kwargs):
     """Analyzes protein structures to identify various interactions using InSty. 
     Processes data from the MSA file and folder with selected models.
     
-    Example usage: calcSignatureInteractions('shortlisted_resind.msa','./struc_homologs')
-    
-    :arg mapping_file: Aligned residue indices, MSA file type
-    :type mapping_file: str
+    Example usage: 
+    >>> calcSignatureInteractions('./struc_homologs')
+    >>> calcSignatureInteractions('./struc_homologs', mapping_file='shortlisted_resind.msa')
 
     :arg PDB_folder: Directory containing PDB model files
     :type PDB_folder: str
 
+    :arg mapping_file: Aligned residue indices, MSA file type
+            required in Foldseek analyisis
+    :type mapping_file: str
+
     :arg fixer: The method for fixing lack of hydrogen bonds
             by default is 'pdbfixer'
     :type fixer: 'pdbfixer' or 'openbabel'
@@ -3892,50 +3895,75 @@ def calcSignatureInteractions(mapping_file, PDB_folder, **kwargs):
     """
     
     import os
-    
-    fixer = kwargs.pop('fixer', 'pdbfixer')
-    remove_tmp_files = kwargs.pop('remove_tmp_files', True)
-    n_per_plot = kwargs.pop('n_per_plot', None)
-    min_height = kwargs.pop('min_height', 8)
-    
-    functions = {
-        "HydrogenBonds": calcHydrogenBonds,
-        "SaltBridges": calcSaltBridges,
-        "RepulsiveIonicBonding": calcRepulsiveIonicBonding,
-        "PiStacking": calcPiStacking,
-        "PiCation": calcPiCation,
-        "Hydrophobic": calcHydrophobic,
-        "DisulfideBonds": calcDisulfideBonds
-    }
+    mapping_file = kwargs.get('mapping_file')
+    
+    if not mapping_file:
+        # Dali and Blast approach 
+        align_files = [os.path.join(PDB_folder, file) for file in os.listdir(PDB_folder)]
+
+        functions = {
+            "HBs": calcHydrogenBonds,
+            "SBs": calcSaltBridges,
+            "RIB": calcRepulsiveIonicBonding,
+            "PiStack": calcPiStacking,
+            "PiCat": calcPiCation,
+            "HPh": calcHydrophobic,
+            "DiBs": calcDisulfideBonds
+        }
 
-    # Process each bond type
-    for bond_type, func in functions.items():
-        # Check if the consensus file already exists
-        consensus_file = '{}_consensus.txt'.format(bond_type)
-        if os.path.exists(consensus_file):
-            log_message("Consensus file for {} already exists, skipping.".format(bond_type), "INFO")
-            continue
+        for bond_type, func in functions.items():
+            for file in align_files:
+                try:
+                    atoms = parsePDB(file)
+                    interactions = func(atoms.select('protein'))
+                    saveInteractionsAsDummyAtoms(atoms, interactions, filename ='INT_'+bond_type+'_'+file)
+                except: pass
+    
+    else:
+        # Foldseek approach
+        mapping_file = kwargs.pop('mapping_file', 'shortlisted_resind.msa')
+        fixer = kwargs.pop('fixer', 'pdbfixer')
+        remove_tmp_files = kwargs.pop('remove_tmp_files', True)
+        n_per_plot = kwargs.pop('n_per_plot', None)
+        min_height = kwargs.pop('min_height', 8)
+        
+        functions = {
+            "HydrogenBonds": calcHydrogenBonds,
+            "SaltBridges": calcSaltBridges,
+            "RepulsiveIonicBonding": calcRepulsiveIonicBonding,
+            "PiStacking": calcPiStacking,
+            "PiCation": calcPiCation,
+            "Hydrophobic": calcHydrophobic,
+            "DisulfideBonds": calcDisulfideBonds
+        }
 
-        log_message("Processing {}".format(bond_type))
-        result = process_data(mapping_file, PDB_folder, func, bond_type, fixer)
+        # Process each bond type
+        for bond_type, func in functions.items():
+            # Check if the consensus file already exists
+            consensus_file = '{}_consensus.txt'.format(bond_type)
+            if os.path.exists(consensus_file):
+                log_message("Consensus file for {} already exists, skipping.".format(bond_type), "INFO")
+                continue
 
-        # Check if the result is None (no valid bonds found)
-        if result is None:
-            log_message("No valid {} entries found, skipping further processing.".format(bond_type), "WARNING")
-            continue
+            log_message("Processing {}".format(bond_type))
+            result = process_data(mapping_file, PDB_folder, func, bond_type, fixer)
 
-        result, fixed_files = result
+            # Check if the result is None (no valid bonds found)
+            if result is None:
+                log_message("No valid {} entries found, skipping further processing.".format(bond_type), "WARNING")
+                continue
 
-        # Proceed with plotting
-        plot_barh(result, bond_type, n_per_plot=n_per_plot, min_height=min_height)
+            result, fixed_files = result
 
-    # Remove all fixed files at the end
-    if remove_tmp_files == True:
-        for fixed_file in fixed_files:
-            if os.path.exists(fixed_file):
-                os.remove(fixed_file)
-                log_message("Removed fixed file: {}".format(fixed_file))
+            # Proceed with plotting
+            plot_barh(result, bond_type, n_per_plot=n_per_plot, min_height=min_height)
 
+        # Remove all fixed files at the end
+        if remove_tmp_files == True:
+            for fixed_file in fixed_files:
+                if os.path.exists(fixed_file):
+                    os.remove(fixed_file)
+                    log_message("Removed fixed file: {}".format(fixed_file))
 
 
 class Interactions(object):

From 566d340bedccc9dc2c4b54343dfc7115391f263d Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Wed, 27 Nov 2024 22:15:33 +0100
Subject: [PATCH 27/30] reoeganization of runFoldseek and
 calcSignatureInteractions

---
 prody/proteins/interactions.py | 113 +++++++++++++++++++++++----------
 1 file changed, 79 insertions(+), 34 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 0e0c74a67..d0291dcea 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3400,6 +3400,26 @@ def runFoldseek(pdb_file, chain, **kwargs):
             by default '~/Downloads/foldseek/pdb'
             To download the database use: 'foldseek databases PDB pdb tmp' in the console 
     :type database_folder: str
+
+    :arg fixer: The method for fixing lack of hydrogen bonds
+            by default is 'pdbfixer'
+    :type fixer: 'pdbfixer' or 'openbabel'
+    
+    :arg subset: subsets of atoms: 'ca', 'bb', 'heavy', 'noh', 'all'  (see matchChains())
+            by default is 'ca'
+    :type subset: str
+
+    :arg seqid: Minimum value of the sequence identity (see matchChains())
+            by default 5
+    :type seqid: float
+    
+    :arg overlap: percent overlap (see matchChains())
+            by default 50
+    :type overlap: float
+
+    :arg folder_name: Folder where the results will be collected
+            by default is 'struc_homologs'
+    :type folder_name: str
     """
     
     import os
@@ -3410,6 +3430,10 @@ def runFoldseek(pdb_file, chain, **kwargs):
     database_folder = kwargs.pop('database_folder', '~/Downloads/foldseek/pdb')
     coverage_threshold = kwargs.pop('coverage_threshold', 0.3)
     tm_threshold = kwargs.pop('tm_threshold', 0.5)    
+    folder_name = kwargs.pop('folder_name', 'struc_homologs')
+    subset = kwargs.pop('subset', 'ca')
+    seqid = kwargs.pop('seqid', 5)
+    overlap = kwargs.pop('overlap', 50)
     
     if not isinstance(pdb_file, str):
         raise TypeError('Please provide the name of the PDB file.')
@@ -3683,9 +3707,34 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
     fp9.close()
     fp10.close()
 
-    extractMultiModelPDB('aligned_structures.pdb', folder_name='struc_homologs')
+    extractMultiModelPDB('aligned_structures.pdb', folder_name=folder_name)
     subprocess.run("rm -f inp.pdb prot.seq target.pdb temp_coord.txt temp_resind.txt temp_seq.txt", shell=True)
     subprocess.run("rm -rf tmp2 temp", shell=True)
+
+    # New part
+    pwd = os.path.abspath(folder_name)
+    list_pdbs = [pwd+'/'+ff for ff in os.listdir(pwd) if ff.endswith('.pdb')]
+    list_pdbs.sort(key=lambda x: int(''.join(filter(str.isdigit, x)))) # all structures will be aligned on model1.pdb as in the oryginal code
+    
+    LOGGER.info('Adding hydrogens to the structures..')
+    new_pdbids = fixStructuresMissingAtoms(list_pdbs, method='pdbfixer', model_residues=True, overwrite=True)
+    
+    structures = parsePDB(new_pdbids)
+    target = structures[0]
+    rmsds = []
+    for mobile in structures[1:]:
+        try:
+            LOGGER.info('Aligning the structures..')
+            i = mobile.getTitle()
+            LOGGER.info(i)
+            matches = matchChains(mobile.protein, target.protein, subset=subset, seqid=seqid, overlap=overlap)
+            m = matches[0]
+            m0_alg, T = superpose(m[0], m[1], weights=m[0].getFlags("mapped"))
+            rmsds.append(calcRMSD(m[0], m[1], weights=m[0].getFlags("mapped")))
+            source_file = pwd+'/'+'align__'+i+'.pdb'
+            writePDB(source_file, mobile)
+        except:
+            LOGGER.warn('There is a problem with {}. Change seqid or overlap parameter to include the structure.'.format(i))
     
     
 def runDali(pdb, chain, **kwargs):
@@ -3761,7 +3810,6 @@ def runDali(pdb, chain, **kwargs):
         writePDB(i[0]+i[1]+'.pdb', p)
         list_pdbs.append(i[0]+i[1]+'.pdb')
     
-    #LOGGER.info('Number of selected structures: ', len(list_pdbs))
     LOGGER.info('Adding hydrogens to the structures..')
     new_pdbids = fixStructuresMissingAtoms(list_pdbs, method='pdbfixer', model_residues=True, overwrite=True)
 
@@ -3881,6 +3929,10 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
     :arg PDB_folder: Directory containing PDB model files
     :type PDB_folder: str
 
+    :arg use_prefix: Whether to use perfix to select particular file names in the PDB_folder
+            Default is True
+    :type use_prefix: bool
+
     :arg mapping_file: Aligned residue indices, MSA file type
             required in Foldseek analyisis
     :type mapping_file: str
@@ -3888,42 +3940,42 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
     :arg fixer: The method for fixing lack of hydrogen bonds
             by default is 'pdbfixer'
     :type fixer: 'pdbfixer' or 'openbabel'
-    
-    :arg remove_tmp_files: Removing intermediate files that are created in the process
-            by default is True
-    :type remove_tmp_files: True or False
     """
     
     import os
     mapping_file = kwargs.get('mapping_file')
+    use_prefix = kwargs.pop('use_prefix', True)
     
-    if not mapping_file:
-        # Dali and Blast approach 
+    if use_prefix == True:
+        align_files = [os.path.join(PDB_folder, file) for file in os.listdir(PDB_folder) if file.startswith("align_")]
+
+    else:
         align_files = [os.path.join(PDB_folder, file) for file in os.listdir(PDB_folder)]
 
-        functions = {
-            "HBs": calcHydrogenBonds,
-            "SBs": calcSaltBridges,
-            "RIB": calcRepulsiveIonicBonding,
-            "PiStack": calcPiStacking,
-            "PiCat": calcPiCation,
-            "HPh": calcHydrophobic,
-            "DiBs": calcDisulfideBonds
-        }
+    functions_dict = {
+        "HBs": calcHydrogenBonds,
+        "SBs": calcSaltBridges,
+        "RIB": calcRepulsiveIonicBonding,
+        "PiStack": calcPiStacking,
+        "PiCat": calcPiCation,
+        "HPh": calcHydrophobic,
+        "DiBs": calcDisulfideBonds
+    }
 
-        for bond_type, func in functions.items():
-            for file in align_files:
-                try:
-                    atoms = parsePDB(file)
-                    interactions = func(atoms.select('protein'))
-                    saveInteractionsAsDummyAtoms(atoms, interactions, filename ='INT_'+bond_type+'_'+file)
-                except: pass
+    for bond_type, func in functions_dict.items():
+        for file in align_files:
+            try:
+                LOGGER.info(file)
+                atoms = parsePDB(file)
+                interactions = func(atoms.select('protein'))
+                saveInteractionsAsDummyAtoms(atoms, interactions, filename=file.rstrip(file.split('/')[-1])+'INT_'+bond_type+'_'+file.split('/')[-1])
+            except: pass
     
-    else:
-        # Foldseek approach
+    
+    if mapping_file:
+        # for MSA file (Foldseek)
         mapping_file = kwargs.pop('mapping_file', 'shortlisted_resind.msa')
         fixer = kwargs.pop('fixer', 'pdbfixer')
-        remove_tmp_files = kwargs.pop('remove_tmp_files', True)
         n_per_plot = kwargs.pop('n_per_plot', None)
         min_height = kwargs.pop('min_height', 8)
         
@@ -3958,13 +4010,6 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
             # Proceed with plotting
             plot_barh(result, bond_type, n_per_plot=n_per_plot, min_height=min_height)
 
-        # Remove all fixed files at the end
-        if remove_tmp_files == True:
-            for fixed_file in fixed_files:
-                if os.path.exists(fixed_file):
-                    os.remove(fixed_file)
-                    log_message("Removed fixed file: {}".format(fixed_file))
-
 
 class Interactions(object):
 

From 12841bbd3a117164610221951b9f949624e648e1 Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Thu, 28 Nov 2024 12:21:30 +0100
Subject: [PATCH 28/30] docs fix [InSty]

---
 prody/proteins/interactions.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index d0291dcea..99fe82bf4 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -3749,35 +3749,35 @@ def runDali(pdb, chain, **kwargs):
 
     :arg cutoff_len: Length of aligned residues < cutoff_len (must be an integer or a float between 0 and 1)
             See searchDali for more details 
-            by default 0.5
+            Default is 0.5
     :type cutoff_len: float
 
     :arg cutoff_rmsd: RMSD cutoff (see searchDali)
-            by default 1.0
+            Default is 1.0
     :type cutoff_rmsd: float
     
-    :arg subsetDali: fullPDB, PDB25, PDB50, PDB90
-            by default is 'fullPDB'
-    :type subsetDali: str    
+    :arg subset_Dali: fullPDB, PDB25, PDB50, PDB90
+            Default is 'fullPDB'
+    :type subset_Dali: str    
     
     :arg fixer: The method for fixing lack of hydrogen bonds
-            by default is 'pdbfixer'
+            Default is 'pdbfixer'
     :type fixer: 'pdbfixer' or 'openbabel'
     
     :arg subset: subsets of atoms: 'ca', 'bb', 'heavy', 'noh', 'all'  (see matchChains())
-            by default is 'ca'
+            Default is 'ca'
     :type subset: str
 
     :arg seqid: Minimum value of the sequence identity (see matchChains())
-            by default 5
+            Default 5
     :type seqid: float
     
     :arg overlap: percent overlap (see matchChains())
-            by default 50
+            Default 50
     :type overlap: float
 
     :arg folder_name: Folder where the results will be collected
-            by default is 'struc_homologs'
+            Default is 'struc_homologs'
     :type folder_name: str
     """
 

From c7d4a9979c3dc7d6554840c060d5e1ba7174a77b Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 1 Dec 2024 21:07:30 +0100
Subject: [PATCH 29/30] final rearrangement of calcSignatureInteractions with
 MSA file

---
 prody/proteins/interactions.py | 76 +++++++++++-----------------------
 1 file changed, 24 insertions(+), 52 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index 99fe82bf4..b4c749ee9 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -356,7 +356,7 @@ def append_residue_code(residue_num, residue_dict):
     aa_code = residue_dict.get(residue_num, "X")  # Use "X" for unknown residues
     return "{}{}".format(aa_code, residue_num)
 
-def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
+def process_data(mapping_file, pdb_folder, interaction_func, bond_type, files_for_analysis):
     """Process the mapping file and the PDB folder to compute interaction counts and percentages."""
     log_message("Loading mapping file: {}".format(mapping_file))
 
@@ -382,33 +382,14 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
     processed_files = 0  # To track the number of files successfully processed
     fixed_files = []  # Store paths of fixed files to remove at the end
 
-    for i, files in enumerate(os.listdir(pdb_folder)):
-        # Skip any file that has already been processed (files with 'addH_' prefix)
-        if files.startswith('addH_'):
-            log_message("Skipping already fixed file: {}".format(files), "INFO")
-            continue
-
+    for i, files in enumerate(files_for_analysis):
         log_message("Processing file {}: {}".format(i + 1, files))
         
-        pdb_file_path = os.path.join(pdb_folder, files)
-        fixed_pdb_path = pdb_file_path.replace(files, 'addH_' + files)
-
-        # Check if the fixed file already exists, skip fixing if it does
-        if not os.path.exists(fixed_pdb_path):
-            try:
-                log_message("Running fixer on {} using {}.".format(pdb_file_path, fixer))
-                addMissingAtoms(pdb_file_path, method=fixer)
-            except Exception as e:
-                log_message("Error adding missing atoms: {}".format(e), "ERROR")
-                continue
-        else:
-            log_message("Using existing fixed file: {}".format(fixed_pdb_path))
-
         try:
-            coords = parsePDB(fixed_pdb_path)
+            coords = parsePDB(files)
             atoms = coords.select('protein')
-            interactions = Interactions()
             bonds = interaction_func(atoms)
+            saveInteractionsAsDummyAtoms(atoms, bonds, filename=files.rstrip(files.split('/')[-1])+'INT_'+bond_type+'_'+files.split('/')[-1])
         except Exception as e:
             log_message("Error processing PDB file {}: {}".format(files, e), "ERROR")
             continue
@@ -419,7 +400,7 @@ def process_data(mapping_file, pdb_folder, interaction_func, bond_type, fixer):
             continue
 
         processed_files += 1  # Increment successfully processed files
-        fixed_files.append(fixed_pdb_path)
+        fixed_files.append(files)
 
         if processed_files == 1:  # First valid file with bonds determines target indices
             for entries in bonds:
@@ -3572,6 +3553,12 @@ def extract_sequence_from_pdb(pdb_file, chain, output_file):
         awk_command = "awk '{{if (substr($0, 22, 1) == \"{0}\") print}}'".format(chain)
         subprocess.run("cat ./temp/{0} | grep -E '^(ATOM|HETATM)' | {1} > target.pdb".format(fname, awk_command), shell=True)
 
+        # Check if target.pdb has fewer than 5 lines
+        if sum(1 for _ in open("target.pdb")) < 5:
+            LOGGER.info("target.pdb has fewer than 5 lines. Skipping further processing.")
+            subprocess.run(['rm', 'target.pdb'])
+            continue
+
         with open('target.pdb', 'r') as target_file:
             file4 = target_file.readlines()
 
@@ -3936,10 +3923,6 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
     :arg mapping_file: Aligned residue indices, MSA file type
             required in Foldseek analyisis
     :type mapping_file: str
-
-    :arg fixer: The method for fixing lack of hydrogen bonds
-            by default is 'pdbfixer'
-    :type fixer: 'pdbfixer' or 'openbabel'
     """
     
     import os
@@ -3962,35 +3945,23 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
         "DiBs": calcDisulfideBonds
     }
 
-    for bond_type, func in functions_dict.items():
-        for file in align_files:
-            try:
-                LOGGER.info(file)
-                atoms = parsePDB(file)
-                interactions = func(atoms.select('protein'))
-                saveInteractionsAsDummyAtoms(atoms, interactions, filename=file.rstrip(file.split('/')[-1])+'INT_'+bond_type+'_'+file.split('/')[-1])
-            except: pass
-    
-    
+    if not mapping_file:
+        for bond_type, func in functions_dict.items():
+            for file in align_files:
+                try:
+                    LOGGER.info(file)
+                    atoms = parsePDB(file)
+                    interactions = func(atoms.select('protein'))
+                    saveInteractionsAsDummyAtoms(atoms, interactions, filename=file.rstrip(file.split('/')[-1])+'INT_'+bond_type+'_'+file.split('/')[-1])
+                except: pass
+        
     if mapping_file:
         # for MSA file (Foldseek)
-        mapping_file = kwargs.pop('mapping_file', 'shortlisted_resind.msa')
-        fixer = kwargs.pop('fixer', 'pdbfixer')
         n_per_plot = kwargs.pop('n_per_plot', None)
         min_height = kwargs.pop('min_height', 8)
         
-        functions = {
-            "HydrogenBonds": calcHydrogenBonds,
-            "SaltBridges": calcSaltBridges,
-            "RepulsiveIonicBonding": calcRepulsiveIonicBonding,
-            "PiStacking": calcPiStacking,
-            "PiCation": calcPiCation,
-            "Hydrophobic": calcHydrophobic,
-            "DisulfideBonds": calcDisulfideBonds
-        }
-
         # Process each bond type
-        for bond_type, func in functions.items():
+        for bond_type, func in functions_dict.items():
             # Check if the consensus file already exists
             consensus_file = '{}_consensus.txt'.format(bond_type)
             if os.path.exists(consensus_file):
@@ -3998,7 +3969,7 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
                 continue
 
             log_message("Processing {}".format(bond_type))
-            result = process_data(mapping_file, PDB_folder, func, bond_type, fixer)
+            result = process_data(mapping_file, PDB_folder, func, bond_type, files_for_analysis=align_files)
 
             # Check if the result is None (no valid bonds found)
             if result is None:
@@ -4009,6 +3980,7 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
 
             # Proceed with plotting
             plot_barh(result, bond_type, n_per_plot=n_per_plot, min_height=min_height)
+            
 
 
 class Interactions(object):

From c159a34e2edf8eaa362a8f327e11855e62056dcd Mon Sep 17 00:00:00 2001
From: karolamik13 <karolami@pitt.edu>
Date: Sun, 1 Dec 2024 21:43:47 +0100
Subject: [PATCH 30/30] Changes in docs

---
 prody/proteins/interactions.py | 100 ++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 52 deletions(-)

diff --git a/prody/proteins/interactions.py b/prody/proteins/interactions.py
index b4c749ee9..0cad12a09 100644
--- a/prody/proteins/interactions.py
+++ b/prody/proteins/interactions.py
@@ -471,11 +471,11 @@ def plot_barh(result, bond_type, **kwargs):
     """Plot horizontal bar plots of percentages of interactions, splitting the data into fixed-sized plots.
 
     :arg n_per_plot: The number of results per one plot
-        by default is 20 if set to None 
+        Default is 20 if set to None 
     :type n_per_plot: int
     
     :arg min_height: Size of the bar plot
-        by default is 8
+        Default is 8
     :type min_height: int
     """
 
@@ -614,15 +614,15 @@ def calcSASA(atoms, **kwargs):
     :type atoms: :class:`.Atomic`
     
     :arg selection: selection string
-        by default all the protein structure is used
+        Default all the protein structure is used
     :type selection: str
     
     :arg sasa_cutoff: cutoff for SASA values
-        default is 0.0
+        Default is 0.0
     :type sasa_cutoff: float, int
 
     :arg resnames: residues name included
-        default is False
+        Default is False
     :type resnames: bool    
     """
     
@@ -672,19 +672,19 @@ def calcVolume(atoms, **kwargs):
     :type atoms: :class:`.Atomic`
     
     :arg selection: selection string
-        by default all the protein structure is used
+        Default all the protein structure is used
     :type selection: str
     
     :arg volume_cutoff: cutoff for volume
-        default is 0.0 to include all the results
+        Default is 0.0 to include all the results
     :type sasa_volume: float, int
 
     :arg split_residues: it will provide values for each residue
-        default is False
+        Default is False
     :type split_residues: bool
 
     :arg resnames: residues name included
-        default is False
+        Default is False
         True - will give residue names and values for each residue
         False - will give only the values for each residues
     :type resnames: bool
@@ -2114,35 +2114,35 @@ def showInteractionsGraph(statistics, **kwargs):
     :type statistics: list
     
     :arg cutoff: Minimal number of counts per residue in the trajectory
-        by default 0.1.
+        Default is 0.1.
     :type cutoff: int, float
 
     :arg code: representation of the residues, 3-letter or 1-letter
-        by default 3-letter
+        Default is 3-letter
     :type code: str
 
     :arg multiple_chains: display chain name for structure with many chains
-        by default False
+        Default is False
     :type multiple_chains: bool
     
     :arg edge_cmap: color of the residue connection
-        by default plt.cm.Blues (blue color)
+        Default is plt.cm.Blues (blue color)
     :type edge_cmap: str
 
     :arg node_size: size of the nodes which describes residues
-        by default 300
+        Default is 300
     :type node_size: int
     
     :arg node_distance: value which will scale residue-residue interactions
-        by default 5
+        Default is 5
     :type node_distance: int
 
     :arg font_size: size of the font
-        by default 14
+        Default is 14
     :type font_size: int
 
     :arg seed: random number which affect the distribution of residues
-        by default 42
+        Default is 42
     :type seed: int
     """
     
@@ -2278,11 +2278,11 @@ def showInteractionsHist(statistics, **kwargs):
     :type statistics: list
     
     :arg clip: maxmimal number of residue pairs on the bar plot
-        by default 20
+        Default is 20
     :type clip: int
 
     :arg font_size: size of the font
-        by default 18
+        Default is 18
     :type font_size: int
     """
 
@@ -2584,7 +2584,7 @@ def listLigandInteractions(PLIP_output, **kwargs):
     
     :arg output: parameter to print the interactions on the screen
                  while analyzing the structure (True | False)
-                 by default is False
+                 Default is False
     :type output: bool     
     
     Note that five types of interactions are considered: hydrogen bonds, salt bridges, 
@@ -2966,23 +2966,23 @@ def calcSminaBindingAffinity(atoms, trajectory=None, **kwargs):
     :arg protein_selection: selection string for the protein and other compoment
                             of the system that should be included,
                             e.g. "protein and chain A",
-                            by default "protein" 
+                            Default is "protein" 
     :type protein_selection: str
     
     :arg ligand_selection: selection string for ligand,
                            e.g. "resname ADP",
-                           by default "all not protein"
+                           Default is "all not protein"
     :type ligand_selection: str
     
     :arg ligand_selection: scoring function (vina or vinardo)
-                           by default is "vina"
+                           Default is "vina"
     
     :type ligand_selection: str
     
     :arg atom_terms: write per-atom interaction term values.
                      It will provide more information as dictionary for each frame/model,
                      and details for atom terms will be saved in terms_*frame_number*.txt,    
-                     by default is False
+                     Default is False
 
     :type atom_terms: bool
      
@@ -3370,38 +3370,37 @@ def runFoldseek(pdb_file, chain, **kwargs):
     :type chain: str
 
     :arg coverage_threshold: Coverage threshold 
-            by default 0.3
+            Default is 0.3
     :type coverage_threshold: float
 
     :arg tm_threshold: TM-score threshold
-            by default 0.5
+            Default is 0.5
     :type tm_threshold: float
     
     :arg database_folder: Folder with the database
-            by default '~/Downloads/foldseek/pdb'
+            Default is '~/Downloads/foldseek/pdb'
             To download the database use: 'foldseek databases PDB pdb tmp' in the console 
     :type database_folder: str
 
     :arg fixer: The method for fixing lack of hydrogen bonds
-            by default is 'pdbfixer'
+            Default is 'pdbfixer'
     :type fixer: 'pdbfixer' or 'openbabel'
     
     :arg subset: subsets of atoms: 'ca', 'bb', 'heavy', 'noh', 'all'  (see matchChains())
-            by default is 'ca'
+            Default is 'ca'
     :type subset: str
 
     :arg seqid: Minimum value of the sequence identity (see matchChains())
-            by default 5
+            Default is 5
     :type seqid: float
     
     :arg overlap: percent overlap (see matchChains())
-            by default 50
+            Default is 50
     :type overlap: float
 
     :arg folder_name: Folder where the results will be collected
-            by default is 'struc_homologs'
-    :type folder_name: str
-    """
+            Default is 'struc_homologs'
+    :type folder_name: str """
     
     import os
     import subprocess
@@ -3756,17 +3755,16 @@ def runDali(pdb, chain, **kwargs):
     :type subset: str
 
     :arg seqid: Minimum value of the sequence identity (see matchChains())
-            Default 5
+            Default is 5
     :type seqid: float
     
     :arg overlap: percent overlap (see matchChains())
-            Default 50
+            Default is 50
     :type overlap: float
 
     :arg folder_name: Folder where the results will be collected
             Default is 'struc_homologs'
-    :type folder_name: str
-    """
+    :type folder_name: str """
 
     import os
     import shutil
@@ -3832,25 +3830,24 @@ def runBLAST(pdb, chain, **kwargs):
     :type chain: str
 
     :arg fixer: The method for fixing lack of hydrogen bonds
-            by default is 'pdbfixer'
+            Default is 'pdbfixer'
     :type fixer: 'pdbfixer' or 'openbabel'
     
     :arg subset: subsets of atoms: 'ca', 'bb', 'heavy', 'noh', 'all'  (see matchChains())
-            by default is 'bb'
+            Default is 'bb'
     :type subset: str
 
     :arg seqid: Minimum value of the sequence identity (see matchChains())
-            by default 90
+            Default is 90
     :type seqid: float
     
     :arg overlap: percent overlap (see matchChains())
-            by default 50
+            Default is 50
     :type overlap: float
 
     :arg folder_name: Folder where the results will be collected
-            by default is 'struc_homologs'
-    :type folder_name: str
-    """
+            Default is 'struc_homologs'
+    :type folder_name: str """
     
     import os
     import shutil
@@ -3922,8 +3919,7 @@ def calcSignatureInteractions(PDB_folder, **kwargs):
 
     :arg mapping_file: Aligned residue indices, MSA file type
             required in Foldseek analyisis
-    :type mapping_file: str
-    """
+    :type mapping_file: str """
     
     import os
     mapping_file = kwargs.get('mapping_file')
@@ -4415,7 +4411,7 @@ def buildInteractionMatrixEnergy(self, **kwargs):
         """Build matrix with interaction energy comming from energy of pairs of specific residues.
         
         :arg energy_list_type: name of the list with energies 
-                            default is 'IB_solv'
+                            Default is 'IB_solv'
         :type energy_list_type: 'IB_nosolv', 'IB_solv', 'CS'
         """
         
@@ -4578,7 +4574,7 @@ def getFrequentInteractors(self, contacts_min=2):
             (7) Disulfide bonds (disb)
         
         :arg contacts_min: Minimal number of contacts which residue may form with other residues, 
-                           by default 2.
+                           Default is 2.
         :type contacts_min: int  """
 
         atoms = self._atoms   
@@ -5611,7 +5607,7 @@ def getLigandInteractions(self, **kwargs):
         :arg include_frames: used with filters, it will leave selected keyword in orginal 
                     lists, if False it will collect selected interactions in one list,
                     Use True to assign new selection using setLigandInteractions.
-                    by default True
+                    Default is True
         :type include_frames: bool            
         """
         
@@ -5661,7 +5657,7 @@ def getLigandInteractionsNumber(self, **kwargs):
         be a total number of interactions or it can be divided into interaction types.
         
         :arg types: Interaction types can be included (True) or not (False).
-                    by default is True. 
+                    Default is True. 
         :type types: bool
         """
 
@@ -5803,7 +5799,7 @@ def saveInteractionsPDB(self, **kwargs):
         :type filename: str  
         
         :arg ligand_sele: ligand selection,
-                          by default is 'all not (protein or water or ion)'.
+                          Default is 'all not (protein or water or ion)'.
         :type ligand_sele: str          
         """