Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable multicova #347

Merged
merged 10 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@
}
],
"results": {
".github/workflows/create_release.yml": [
{
"type": "Secret Keyword",
"filename": ".github/workflows/create_release.yml",
"hashed_secret": "3e26d6750975d678acb8fa35a0f69237881576b0",
"is_verified": false,
"line_number": 15,
"is_secret": false
}
],
"docs/workflow_mq.html": [
{
"type": "Base64 High Entropy String",
Expand All @@ -150,5 +160,5 @@
}
]
},
"generated_at": "2024-09-18T09:54:14Z"
"generated_at": "2024-10-08T15:30:41Z"
}
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ You can run the checks yourself using:
pre-commit run --all-files
```

##### The `detect-secrets` hook fails
This is because you added some code that was identified as a potential secret.
1. Run `detect-secrets scan --exclude-files testfiles --exclude-lines '"(hash|id|image/\w+)":.*' > .secrets.baseline`
(check `.pre-commit-config.yaml` for the exact parameters)
2. Run `detect-secrets audit .secrets.baseline` and check if the detected 'secret' is actually a secret
3. Commit the latest version of `.secrets.baseline`



---
Expand Down
16 changes: 15 additions & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def _get_init_dataset(

return rawmat, mat, metadata, preprocessing_info

def _check_loader(self, loader):
@staticmethod
def _check_loader(loader):
"""Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader

Args:
Expand Down Expand Up @@ -252,6 +253,19 @@ def ancova(
"""A wrapper for Statistics.ancova(), see documentation there."""
return self._get_statistics().ancova(protein_id, covar, between)

def multicova_analysis(
self,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
) -> Tuple[pd.DataFrame, list]:
"""A wrapper for Statistics.multicova_analysis(), see documentation there."""
return self._get_statistics().multicova_analysis(
covariates, n_permutations, fdr, s0, subset
)

@check_for_missing_values
def plot_pca(self, group: Optional[str] = None, circle: bool = False):
"""Plot Principal Component Analysis (PCA)
Expand Down
70 changes: 36 additions & 34 deletions alphastats/DataSet_Statistics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import lru_cache
from typing import Dict, Union
from typing import Dict, Tuple, Union

import pandas as pd
import pingouin
Expand All @@ -9,6 +9,7 @@
from alphastats.statistics.DifferentialExpressionAnalysis import (
DifferentialExpressionAnalysis,
)
from alphastats.statistics.MultiCovaAnalysis import MultiCovaAnalysis
from alphastats.utils import ignore_warning


Expand Down Expand Up @@ -121,36 +122,37 @@ def ancova(
ancova_df = pingouin.ancova(df, dv=protein_id, covar=covar, between=between)
return ancova_df

# @ignore_warning(RuntimeWarning)
# def multicova_analysis( # TODO never used outside of tests .. how does this relate to multicova.py?
# self,
# covariates: list,
# n_permutations: int = 3,
# fdr: float = 0.05,
# s0: float = 0.05,
# subset: dict = None,
# ) -> Union[pd.DataFrame, list]:
# """Perform Multicovariat Analysis
# will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)
#
# Args:
# covariates (list): list of covariates, column names in metadata
# n_permutations (int, optional): number of permutations. Defaults to 3.
# fdr (float, optional): False Discovery Rate. Defaults to 0.05.
# s0 (float, optional): . Defaults to 0.05.
# subset (dict, optional): for categorical covariates . Defaults to None.
#
# Returns:
# pd.DataFrame: Multicova Analysis results
# """
#
# res, plot_list = MultiCovaAnalysis(
# dataset=self, # TODO fix .. does this write to it?
# covariates=covariates,
# n_permutations=n_permutations,
# fdr=fdr,
# s0=s0,
# subset=subset,
# plot=True,
# ).calculate()
# return res, plot_list
@ignore_warning(RuntimeWarning)
def multicova_analysis( # TODO never used outside of tests .. how does this relate to multicova.py?
self,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
) -> Tuple[pd.DataFrame, list]:
"""Perform Multicovariat Analysis
will return a pandas DataFrame with the results and a list of volcano plots (for each covariat)

Args:
covariates (list): list of covariates, column names in metadata
n_permutations (int, optional): number of permutations. Defaults to 3.
fdr (float, optional): False Discovery Rate. Defaults to 0.05.
s0 (float, optional): . Defaults to 0.05.
subset (dict, optional): for categorical covariates . Defaults to None.

Returns:
pd.DataFrame: Multicova Analysis results
"""

res, plot_list = MultiCovaAnalysis(
mat=self.mat,
metadata=self.metadata,
covariates=covariates,
n_permutations=n_permutations,
fdr=fdr,
s0=s0,
subset=subset,
).calculate()

return res, plot_list
11 changes: 8 additions & 3 deletions alphastats/plots/PlotUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ class PlotlyObject(plotly.graph_objs._figure.Figure):
class PlotUtils:
@staticmethod
def _update_colors_plotly(fig, color_dict):
# plotly doesnt allow to assign color to certain group
# update instead the figure in form of a dict
# color_dict with group_variable/legendgroup as key, and corresponding color as value
# TODO revisit this comment:
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
# plotly doesnt allow to assign color to certain group
# update instead the figure in form of a dict
# color_dict with group_variable/legendgroup as key, and corresponding color as value
# update:
# https://plotly.com/python-api-reference/generated/generated/plotly.graph_objects.Figure.update_traces.html
# + selector to set individual color or something like:
# plot.for_each_trace(lambda t: t.update(marker_color=color_dict.get(t.legendgroup))
fig_dict = fig.to_plotly_json()
data_dict_list = fig_dict.get("data")
for count, group in enumerate(data_dict_list):
Expand Down
25 changes: 16 additions & 9 deletions alphastats/statistics/MultiCovaAnalysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings

import numpy as np
import pandas as pd
import plotly.express as px

from alphastats.keys import Cols
Expand All @@ -10,15 +11,21 @@
class MultiCovaAnalysis:
def __init__(
self,
dataset,
*,
mat: pd.DataFrame,
metadata: pd.DataFrame,
covariates: list,
n_permutations: int = 3,
fdr: float = 0.05,
s0: float = 0.05,
subset: dict = None,
plot: bool = False,
):
self.dataset = dataset # TODO pass only .mat, .metadata
self.metadata_ori = metadata
self.mat = mat

self.metadata = None # TODO check if the distinction between metadata and metadata_ori is necessary

self.covariates = covariates
self.n_permutations = n_permutations
self.fdr = fdr
Expand All @@ -38,25 +45,25 @@ def _subset_metadata(self):
# dict structure {"column_name": ["group1", "group2"]}
subset_column = list(self.subset.keys())[0]
groups = self.subset.get(subset_column)
self.metadata = self.dataset.metadata[
self.dataset.metadata[subset_column].isin(groups)
self.metadata = self.metadata_ori[
self.metadata_ori[subset_column].isin(groups)
][columns_to_keep]

else:
self.metadata = self.dataset.metadata[columns_to_keep]
self.metadata = self.metadata_ori[columns_to_keep]

def _check_covariat_input(self):
# check whether covariates in metadata column
misc_covariates = list(
set(self.covariates) - set(self.dataset.metadata.columns.to_list())
set(self.covariates) - set(self.metadata_ori.columns.to_list())
)
if len(misc_covariates) > 0:
warnings.warn(f"Covariates: {misc_covariates} are not found in Metadata.")
self.covariates = [x for x in self.covariates if x not in misc_covariates]

def _check_na_values(self):
for covariate in self.covariates:
if self.dataset.metadata[covariate].isna().any():
if self.metadata_ori[covariate].isna().any():
self.covariates.remove(covariate)
warnings.warn(
f"Covariate: {covariate} contains missing values in metadata and will not be used for analysis."
Expand Down Expand Up @@ -98,7 +105,7 @@ def _convert_string_to_binary(self):
self.covariates.remove(col)

def _prepare_matrix(self):
transposed = self.dataset.mat.transpose()
transposed = self.mat.transpose()
transposed[Cols.INDEX] = transposed.index
transposed = transposed.reset_index(drop=True)
self.transposed = transposed[self.metadata[Cols.SAMPLE].to_list()]
Expand Down Expand Up @@ -134,7 +141,7 @@ def calculate(self):
fdr=self.fdr,
s0=self.s0,
)
res[Cols.INDEX] = self.dataset.mat.columns.to_list()
res[Cols.INDEX] = self.mat.columns.to_list()
plot_list = []

if self.plot:
Expand Down
2 changes: 0 additions & 2 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,8 +753,6 @@ def test_batch_correction(self):
first_value = self.obj.mat.values[0, 0]
self.assertTrue(np.isclose(2.624937690577153e-08, first_value))

# TODO this opens a plot in a browser window
@skip # TODO multicova_analysis is unused
def test_multicova_analysis_invalid_covariates(self):
self.obj.preprocess(imputation="knn", normalization="zscore", subset=True)
res, _ = self.obj.multicova_analysis(
Expand Down
Loading