Skip to content

Commit

Permalink
Merge pull request #347 from MannLabs/enable_multicova
Browse files Browse the repository at this point in the history
Enable multicova
  • Loading branch information
mschwoer authored Nov 18, 2024
2 parents 257d05c + 24b3232 commit 670ec10
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 49 deletions.
16 changes: 15 additions & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def _get_init_dataset(

return rawmat, mat, metadata, preprocessing_info

def _check_loader(self, loader):
@staticmethod
def _check_loader(loader):
"""Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader
Args:
Expand Down Expand Up @@ -256,6 +257,19 @@ def ancova(
"""A wrapper for Statistics.ancova(), see documentation there."""
return self._get_statistics().ancova(protein_id, covar, between)

def multicova_analysis(
self,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
) -> Tuple[pd.DataFrame, list]:
"""A wrapper for Statistics.multicova_analysis(), see documentation there."""
return self._get_statistics().multicova_analysis(
covariates, n_permutations, fdr, s0, subset
)

@check_for_missing_values
def plot_pca(self, group: Optional[str] = None, circle: bool = False):
"""Plot Principal Component Analysis (PCA)
Expand Down
70 changes: 36 additions & 34 deletions alphastats/DataSet_Statistics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import lru_cache
from typing import Dict, Union
from typing import Dict, Tuple, Union

import pandas as pd
import pingouin
Expand All @@ -9,6 +9,7 @@
from alphastats.statistics.DifferentialExpressionAnalysis import (
DifferentialExpressionAnalysis,
)
from alphastats.statistics.MultiCovaAnalysis import MultiCovaAnalysis
from alphastats.utils import ignore_warning


Expand Down Expand Up @@ -121,36 +122,37 @@ def ancova(
ancova_df = pingouin.ancova(df, dv=protein_id, covar=covar, between=between)
return ancova_df

# @ignore_warning(RuntimeWarning)
# def multicova_analysis( # TODO never used outside of tests .. how does this relate to multicova.py?
# self,
# covariates: list,
# n_permutations: int = 3,
# fdr: float = 0.05,
# s0: float = 0.05,
# subset: dict = None,
# ) -> Union[pd.DataFrame, list]:
# """Perform Multicovariat Analysis
# will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
#
# Args:
# covariates (list): list of covariates, column names in metadata
# n_permutations (int, optional): number of permutations. Defaults to 3.
# fdr (float, optional): False Discovery Rate. Defaults to 0.05.
# s0 (float, optional): . Defaults to 0.05.
# subset (dict, optional): for categorical covariates . Defaults to None.
#
# Returns:
# pd.DataFrame: Multicova Analysis results
# """
#
# res, plot_list = MultiCovaAnalysis(
# dataset=self, # TODO fix .. does this write to it?
# covariates=covariates,
# n_permutations=n_permutations,
# fdr=fdr,
# s0=s0,
# subset=subset,
# plot=True,
# ).calculate()
# return res, plot_list
@ignore_warning(RuntimeWarning)
def multicova_analysis( # TODO never used outside of tests .. how does this relate to multicova.py?
self,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
) -> Tuple[pd.DataFrame, list]:
"""Perform Multicovariat Analysis
will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
Args:
covariates (list): list of covariates, column names in metadata
n_permutations (int, optional): number of permutations. Defaults to 3.
fdr (float, optional): False Discovery Rate. Defaults to 0.05.
s0 (float, optional): . Defaults to 0.05.
subset (dict, optional): for categorical covariates . Defaults to None.
Returns:
pd.DataFrame: Multicova Analysis results
"""

res, plot_list = MultiCovaAnalysis(
mat=self.mat,
metadata=self.metadata,
covariates=covariates,
n_permutations=n_permutations,
fdr=fdr,
s0=s0,
subset=subset,
).calculate()

return res, plot_list
11 changes: 8 additions & 3 deletions alphastats/plots/PlotUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ class PlotlyObject(plotly.graph_objs._figure.Figure):
class PlotUtils:
@staticmethod
def _update_colors_plotly(fig, color_dict):
# plotly doesnt allow to assign color to certain group
# update instead the figure in form of a dict
# color_dict with group_variable/legendgroup as key, and corresponding color as value
# TODO revisit this comment:
# plotly doesnt allow to assign color to certain group
# update instead the figure in form of a dict
# color_dict with group_variable/legendgroup as key, and corresponding color as value
# update:
# https://plotly.com/python-api-reference/generated/generated/plotly.graph_objects.Figure.update_traces.html
# + selector to set individual color or something like:
# plot.for_each_trace(lambda t: t.update(marker_color=color_dict.get(t.legendgroup))
fig_dict = fig.to_plotly_json()
data_dict_list = fig_dict.get("data")
for count, group in enumerate(data_dict_list):
Expand Down
25 changes: 16 additions & 9 deletions alphastats/statistics/MultiCovaAnalysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings

import numpy as np
import pandas as pd
import plotly.express as px

from alphastats.keys import Cols
Expand All @@ -10,15 +11,21 @@
class MultiCovaAnalysis:
def __init__(
self,
dataset,
*,
mat: pd.DataFrame,
metadata: pd.DataFrame,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
plot: bool = False,
):
self.dataset = dataset # TODO pass only .mat, .metadata
self.metadata_ori = metadata
self.mat = mat

self.metadata = None # TODO check if the distinction between metadata and metadata_ori is necessary

self.covariates = covariates
self.n_permutations = n_permutations
self.fdr = fdr
Expand All @@ -38,25 +45,25 @@ def _subset_metadata(self):
# dict structure {"column_name": ["group1", "group2"]}
subset_column = list(self.subset.keys())[0]
groups = self.subset.get(subset_column)
self.metadata = self.dataset.metadata[
self.dataset.metadata[subset_column].isin(groups)
self.metadata = self.metadata_ori[
self.metadata_ori[subset_column].isin(groups)
][columns_to_keep]

else:
self.metadata = self.dataset.metadata[columns_to_keep]
self.metadata = self.metadata_ori[columns_to_keep]

def _check_covariat_input(self):
# check whether covariates in metadata column
misc_covariates = list(
set(self.covariates) - set(self.dataset.metadata.columns.to_list())
set(self.covariates) - set(self.metadata_ori.columns.to_list())
)
if len(misc_covariates) > 0:
warnings.warn(f"Covariates: {misc_covariates} are not found in Metadata.")
self.covariates = [x for x in self.covariates if x not in misc_covariates]

def _check_na_values(self):
for covariate in self.covariates:
if self.dataset.metadata[covariate].isna().any():
if self.metadata_ori[covariate].isna().any():
self.covariates.remove(covariate)
warnings.warn(
f"Covariate: {covariate} contains missing values in metadata and will not be used for analysis."
Expand Down Expand Up @@ -98,7 +105,7 @@ def _convert_string_to_binary(self):
self.covariates.remove(col)

def _prepare_matrix(self):
transposed = self.dataset.mat.transpose()
transposed = self.mat.transpose()
transposed[Cols.INDEX] = transposed.index
transposed = transposed.reset_index(drop=True)
self.transposed = transposed[self.metadata[Cols.SAMPLE].to_list()]
Expand Down Expand Up @@ -134,7 +141,7 @@ def calculate(self):
fdr=self.fdr,
s0=self.s0,
)
res[Cols.INDEX] = self.dataset.mat.columns.to_list()
res[Cols.INDEX] = self.mat.columns.to_list()
plot_list = []

if self.plot:
Expand Down
2 changes: 0 additions & 2 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,8 +759,6 @@ def test_batch_correction(self):
first_value = self.obj.mat.values[0, 0]
self.assertTrue(np.isclose(150490495.32554176, first_value))

# TODO this opens a plot in a browser window
@skip # TODO multicova_analysis is unused
def test_multicova_analysis_invalid_covariates(self):
self.obj.preprocess(imputation="knn", normalization="zscore", subset=True)
res, _ = self.obj.multicova_analysis(
Expand Down

0 comments on commit 670ec10

Please sign in to comment.