Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor volcano i #359

Merged
merged 11 commits into from
Nov 14, 2024
39 changes: 7 additions & 32 deletions alphastats/plots/VolcanoPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from alphastats.statistics.DifferentialExpressionAnalysis import (
DifferentialExpressionAnalysis,
)
from alphastats.statistics.StatisticUtils import _add_metadata_column
from alphastats.utils import ignore_warning

# TODO this is repeated and needs to go elsewhere!
Expand Down Expand Up @@ -84,15 +85,18 @@ def __init__(
self.color_list = color_list

if isinstance(group1, list) and isinstance(group2, list):
self.metadata, self.column = self._add_metadata_column(
metadata, group1, group2
self.metadata, self.column = _add_metadata_column(
metadata, sample, group1, group2
)
self.group1, self.group2 = "group1", "group2"
else:
self.metadata, self.column = metadata, column
self.group1, self.group2 = group1, group2

self._check_input()
if self.column is None:
raise ValueError(
"Column containing group1 and group2 needs to be specified"
)

self._statistics = Statistics(
mat=self.mat,
Expand All @@ -108,35 +112,6 @@ def __init__(
self._add_hover_data_columns()
self._plot()

def _add_metadata_column(
self, metadata: pd.DataFrame, group1_list: list, group2_list: list
):
# create new column in metadata with defined groups

sample_names = metadata[self.sample].to_list()
misc_samples = list(set(group1_list + group2_list) - set(sample_names))
if len(misc_samples) > 0:
raise ValueError(
f"Sample names: {misc_samples} are not described in Metadata."
)

column = "_comparison_column"
conditons = [
metadata[self.sample].isin(group1_list),
metadata[self.sample].isin(group2_list),
]
choices = ["group1", "group2"]
metadata[column] = np.select(conditons, choices, default=np.nan)

return metadata, column

def _check_input(self):
"""Check if self.column is set correctly."""
if self.column is None:
raise ValueError(
"Column containing group1 and group2 needs to be specified"
)

# TODO revisit this
def _update(self, updated_attributes):
"""
Expand Down
42 changes: 10 additions & 32 deletions alphastats/statistics/DifferentialExpressionAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import scipy

from alphastats.DataSet_Preprocess import PreprocessingStateKeys
from alphastats.statistics.StatisticUtils import _add_metadata_column


class DifferentialExpressionAnalysis:
Expand All @@ -15,34 +16,34 @@ def __init__(
sample: str,
index_column: str,
preprocessing_info: Dict,
# TODO move these to perform()?
group1: Union[str, list],
group2: Union[str, list],
column: str = None,
# TODO move these to perform()?
method: str = "ttest",
perm: int = 10,
fdr: float = 0.05,
):
self.mat = mat
self.metadata = metadata

self.sample = sample
self.index_column = index_column
self.preprocessing_info = preprocessing_info

self.group1 = group1
self.group2 = group2
self.column = column
JuliaS92 marked this conversation as resolved.
Show resolved Hide resolved
self.method = method
self.perm = perm
self.fdr = fdr

def _check_groups(self):
if isinstance(self.group1, list) and isinstance(self.group2, list):
self.column, self.group1, self.group2 = self._add_metadata_column(
self.group1, self.group2
self.metadata, self.column = _add_metadata_column(
metadata, sample, group1, group2
)
self.group1, self.group2 = "group1", "group2"
else:
self.metadata, self.column = metadata, column
self.group1, self.group2 = group1, group2

elif self.column is None:
if self.column is None:
raise ValueError(
"Column containing group1 and group2 needs to be specified"
)
Expand Down Expand Up @@ -81,29 +82,6 @@ def _prepare_anndata(self):
)
return anndata_data

def _add_metadata_column(self, group1_list: list, group2_list: list):
# create new column in metadata with defined groups
metadata = self.metadata

sample_names = metadata[self.sample].to_list()

misc_samples = list(set(group1_list + group2_list) - set(sample_names))
if len(misc_samples) > 0:
raise ValueError(
f"Sample names: {misc_samples} are not described in Metadata."
)

column = "_comparison_column"
conditons = [
metadata[self.sample].isin(group1_list),
metadata[self.sample].isin(group2_list),
]
choices = ["group1", "group2"]
metadata[column] = np.select(conditons, choices, default=np.nan)
self.metadata = metadata

return column, "group1", "group2"

def _sam(self) -> pd.DataFrame: # TODO duplicated? DUP1
from alphastats.multicova import multicova

Expand Down
40 changes: 17 additions & 23 deletions alphastats/statistics/StatisticUtils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
import numpy as np
import pandas as pd


# TODO: Check if StatisticUtils is used productively anywhere. Otherwise remove it.
class StatisticUtils:
def __init__(self) -> None:
pass
def _add_metadata_column(
metadata: pd.DataFrame, sample: str, group1_list: list, group2_list: list
):
# create new column in metadata with defined groups

def _add_metadata_column(self, group1_list: list, group2_list: list):
# create new column in metadata with defined groups
metadata = self.metadata
sample_names = metadata[sample].to_list()
misc_samples = list(set(group1_list + group2_list) - set(sample_names))
if len(misc_samples) > 0:
raise ValueError(f"Sample names: {misc_samples} are not described in Metadata.")

sample_names = metadata[self.sample].to_list()
misc_samples = list(set(group1_list + group2_list) - set(sample_names))
if len(misc_samples) > 0:
raise ValueError(
f"Sample names: {misc_samples} are not described in Metadata."
)
column = "_comparison_column"
conditons = [
metadata[sample].isin(group1_list),
metadata[sample].isin(group2_list),
]
choices = ["group1", "group2"]
metadata[column] = np.select(conditons, choices, default=np.nan)

column = "_comparison_column"
conditons = [
metadata[self.sample].isin(group1_list),
metadata[self.sample].isin(group2_list),
]
choices = ["group1", "group2"]
metadata[column] = np.select(conditons, choices, default=np.nan)
self.metadata = metadata

return column, "group1", "group2"
return metadata, column