MannLabs · mschwoer · Nov 18, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -139,6 +139,16 @@
     }
   ],
   "results": {
+    ".github/workflows/create_release.yml": [
+      {
+        "type": "Secret Keyword",
+        "filename": ".github/workflows/create_release.yml",
+        "hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0",
+        "is_verified": false,
+        "line_number": 15,
+        "is_secret": false
+      }
+    ],
     "docs/workflow_mq.html": [
       {
         "type": "Base64 High Entropy String",
@@ -150,5 +160,5 @@
       }
     ]
   },
-  "generated_at": "2024-09-18T09:54:14Z"
+  "generated_at": "2024-10-08T15:30:41Z"
 }
diff --git a/README.md b/README.md
@@ -155,6 +155,13 @@ You can run the checks yourself using:
 pre-commit run --all-files
 ```
 
+##### The `detect-secrets` hook fails
+This is because you added some code that was identified as a potential secret.
+1. Run `detect-secrets scan --exclude-files testfiles --exclude-lines '"(hash|id|image/\w+)":.*' > .secrets.baseline`
+(check `.pre-commit-config.yaml` for the exact parameters)
+2. Run `detect-secrets audit .secrets.baseline` and check if the detected 'secret' is actually a secret
+3. Commit the latest version of `.secrets.baseline`
+
 
 
 ---

diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py
@@ -134,7 +134,8 @@ def _get_init_dataset(
 
         return rawmat, mat, metadata, preprocessing_info
 
-    def _check_loader(self, loader):
+    @staticmethod
+    def _check_loader(loader):
         """Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader
 
         Args:
@@ -252,6 +253,19 @@ def ancova(
         """A wrapper for Statistics.ancova(), see documentation there."""
         return self._get_statistics().ancova(protein_id, covar, between)
 
+    def multicova_analysis(
+        self,
+        covariates: list,
+        n_permutations: int = 3,
+        fdr: float = 0.05,
+        s0: float = 0.05,
+        subset: dict = None,
+    ) -> Tuple[pd.DataFrame, list]:
+        """A wrapper for Statistics.multicova_analysis(), see documentation there."""
+        return self._get_statistics().multicova_analysis(
+            covariates, n_permutations, fdr, s0, subset
+        )
+
     @check_for_missing_values
     def plot_pca(self, group: Optional[str] = None, circle: bool = False):
         """Plot Principal Component Analysis (PCA)

diff --git a/alphastats/DataSet_Statistics.py b/alphastats/DataSet_Statistics.py
@@ -1,5 +1,5 @@
 from functools import lru_cache
-from typing import Dict, Union
+from typing import Dict, Tuple, Union
 
 import pandas as pd
 import pingouin
@@ -9,6 +9,7 @@
 from alphastats.statistics.DifferentialExpressionAnalysis import (
     DifferentialExpressionAnalysis,
 )
+from alphastats.statistics.MultiCovaAnalysis import MultiCovaAnalysis
 from alphastats.utils import ignore_warning
 
 
@@ -121,36 +122,37 @@ def ancova(
         ancova_df = pingouin.ancova(df, dv=protein_id, covar=covar, between=between)
         return ancova_df
 
-    # @ignore_warning(RuntimeWarning)
-    # def multicova_analysis(  # TODO never used outside of tests .. how does this relate to multicova.py?
-    #     self,
-    #     covariates: list,
-    #     n_permutations: int = 3,
-    #     fdr: float = 0.05,
-    #     s0: float = 0.05,
-    #     subset: dict = None,
-    # ) -> Union[pd.DataFrame, list]:
-    #     """Perform Multicovariat Analysis
-    #     will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
-    #
-    #     Args:
-    #         covariates (list): list of covariates, column names in metadata
-    #         n_permutations (int, optional): number of permutations. Defaults to 3.
-    #         fdr (float, optional): False Discovery Rate. Defaults to 0.05.
-    #         s0 (float, optional): . Defaults to 0.05.
-    #         subset (dict, optional): for categorical covariates . Defaults to None.
-    #
-    #     Returns:
-    #         pd.DataFrame: Multicova Analysis results
-    #     """
-    #
-    #     res, plot_list = MultiCovaAnalysis(
-    #         dataset=self,  # TODO fix .. does this write to it?
-    #         covariates=covariates,
-    #         n_permutations=n_permutations,
-    #         fdr=fdr,
-    #         s0=s0,
-    #         subset=subset,
-    #         plot=True,
-    #     ).calculate()
-    #     return res, plot_list
+    @ignore_warning(RuntimeWarning)
+    def multicova_analysis(  # TODO never used outside of tests .. how does this relate to multicova.py?
+        self,
+        covariates: list,
+        n_permutations: int = 3,
+        fdr: float = 0.05,
+        s0: float = 0.05,
+        subset: dict = None,
+    ) -> Tuple[pd.DataFrame, list]:
+        """Perform Multicovariat Analysis
+        will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
+
+        Args:
+            covariates (list): list of covariates, column names in metadata
+            n_permutations (int, optional): number of permutations. Defaults to 3.
+            fdr (float, optional): False Discovery Rate. Defaults to 0.05.
+            s0 (float, optional): . Defaults to 0.05.
+            subset (dict, optional): for categorical covariates . Defaults to None.
+
+        Returns:
+            pd.DataFrame: Multicova Analysis results
+        """
+
+        res, plot_list = MultiCovaAnalysis(
+            mat=self.mat,
+            metadata=self.metadata,
+            covariates=covariates,
+            n_permutations=n_permutations,
+            fdr=fdr,
+            s0=s0,
+            subset=subset,
+        ).calculate()
+
+        return res, plot_list
diff --git a/alphastats/plots/PlotUtils.py b/alphastats/plots/PlotUtils.py
@@ -41,9 +41,14 @@ class PlotlyObject(plotly.graph_objs._figure.Figure):
 class PlotUtils:
     @staticmethod
     def _update_colors_plotly(fig, color_dict):
-        # plotly doesnt allow to assign color to certain group
-        # update instead the figure in form of a dict
-        # color_dict with group_variable/legendgroup as key, and corresponding color as value
+        # TODO revisit this comment:
+        #  plotly doesnt allow to assign color to certain group
+        #  update instead the figure in form of a dict
+        #  color_dict with group_variable/legendgroup as key, and corresponding color as value
+        # update:
+        # https://plotly.com/python-api-reference/generated/generated/plotly.graph_objects.Figure.update_traces.html
+        # + selector to set individual color or something like:
+        # plot.for_each_trace(lambda t: t.update(marker_color=color_dict.get(t.legendgroup))
         fig_dict = fig.to_plotly_json()
         data_dict_list = fig_dict.get("data")
         for count, group in enumerate(data_dict_list):

diff --git a/alphastats/statistics/MultiCovaAnalysis.py b/alphastats/statistics/MultiCovaAnalysis.py
@@ -1,6 +1,7 @@
 import warnings
 
 import numpy as np
+import pandas as pd
 import plotly.express as px
 
 from alphastats.keys import Cols
@@ -10,15 +11,21 @@
 class MultiCovaAnalysis:
     def __init__(
         self,
-        dataset,
+        *,
+        mat: pd.DataFrame,
+        metadata: pd.DataFrame,
         covariates: list,
         n_permutations: int = 3,
         fdr: float = 0.05,
         s0: float = 0.05,
         subset: dict = None,
         plot: bool = False,
     ):
-        self.dataset = dataset  # TODO pass only .mat, .metadata
+        self.metadata_ori = metadata
+        self.mat = mat
+
+        self.metadata = None  # TODO check if the distinction between metadata and metadata_ori is necessary
+
         self.covariates = covariates
         self.n_permutations = n_permutations
         self.fdr = fdr
@@ -38,25 +45,25 @@ def _subset_metadata(self):
             # dict structure {"column_name": ["group1", "group2"]}
             subset_column = list(self.subset.keys())[0]
             groups = self.subset.get(subset_column)
-            self.metadata = self.dataset.metadata[
-                self.dataset.metadata[subset_column].isin(groups)
+            self.metadata = self.metadata_ori[
+                self.metadata_ori[subset_column].isin(groups)
             ][columns_to_keep]
 
         else:
-            self.metadata = self.dataset.metadata[columns_to_keep]
+            self.metadata = self.metadata_ori[columns_to_keep]
 
     def _check_covariat_input(self):
         # check whether covariates in metadata column
         misc_covariates = list(
-            set(self.covariates) - set(self.dataset.metadata.columns.to_list())
+            set(self.covariates) - set(self.metadata_ori.columns.to_list())
         )
         if len(misc_covariates) > 0:
             warnings.warn(f"Covariates: {misc_covariates} are not found in Metadata.")
             self.covariates = [x for x in self.covariates if x not in misc_covariates]
 
     def _check_na_values(self):
         for covariate in self.covariates:
-            if self.dataset.metadata[covariate].isna().any():
+            if self.metadata_ori[covariate].isna().any():
                 self.covariates.remove(covariate)
                 warnings.warn(
                     f"Covariate: {covariate} contains missing values in metadata and will not be used for analysis."
@@ -98,7 +105,7 @@ def _convert_string_to_binary(self):
                     self.covariates.remove(col)
 
     def _prepare_matrix(self):
-        transposed = self.dataset.mat.transpose()
+        transposed = self.mat.transpose()
         transposed[Cols.INDEX] = transposed.index
         transposed = transposed.reset_index(drop=True)
         self.transposed = transposed[self.metadata[Cols.SAMPLE].to_list()]
@@ -134,7 +141,7 @@ def calculate(self):
             fdr=self.fdr,
             s0=self.s0,
         )
-        res[Cols.INDEX] = self.dataset.mat.columns.to_list()
+        res[Cols.INDEX] = self.mat.columns.to_list()
         plot_list = []
 
         if self.plot:

diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py
@@ -753,8 +753,6 @@ def test_batch_correction(self):
         first_value = self.obj.mat.values[0, 0]
         self.assertTrue(np.isclose(2.624937690577153e-08, first_value))
 
-    # TODO this opens a plot in a browser window
-    @skip  # TODO multicova_analysis is unused
     def test_multicova_analysis_invalid_covariates(self):
         self.obj.preprocess(imputation="knn", normalization="zscore", subset=True)
         res, _ = self.obj.multicova_analysis(