Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decouple dataset from statistics #329

Merged
merged 19 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,14 @@ jobs:
- name: Print pip freeze
run: |
pip freeze
- name: Run tests
run: |
coverage run -m pytest
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
- name: Run notebooks
run: |
python3 -m ipykernel install --user
# TODO add the excluded notebook
TEST_NBS=$(find ./nbs -name "*.ipynb" | grep -v "ramus_2016.ipynb")
python -m pytest --nbmake $(echo $TEST_NBS)
- name: Run tests
run: |
coverage run -m pytest
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
104 changes: 76 additions & 28 deletions alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from alphastats.DataSet_Preprocess import Preprocess, PreprocessingStateKeys
from alphastats.DataSet_Statistics import Statistics
from alphastats.utils import LoaderError
from alphastats.statistics.tukey_test import tukey_test

plotly.io.templates["alphastats_colors"] = plotly.graph_objects.layout.Template(
layout=plotly.graph_objects.Layout(
Expand All @@ -35,7 +36,7 @@
plotly.io.templates.default = "simple_white+alphastats_colors"


class DataSet(Statistics, Plot):
class DataSet(Plot):
"""Analysis Object"""

def __init__(
Expand Down Expand Up @@ -100,9 +101,21 @@ def __init__(

print("DataSet has been created.")

def _get_preprocess(self) -> Preprocess:
"""Return instance of the Preprocess object."""
return Preprocess(
self.filter_columns,
self.rawinput,
self.index_column,
self.sample,
self.metadata,
self.preprocessing_info,
self.mat,
)

def preprocess(
self,
log2_transform: bool = True,
log2_transform: bool = False,
remove_contaminations: bool = False,
subset: bool = False,
data_completeness: float = 0,
Expand All @@ -111,26 +124,18 @@ def preprocess(
remove_samples: list = None,
**kwargs,
) -> None:
"""A wrapper for the preprocess() method, see documentation in Preprocess.preprocess()."""
pp = Preprocess(
self.filter_columns,
self.rawinput,
self.index_column,
self.sample,
self.metadata,
self.preprocessing_info,
self.mat,
)

self.mat, self.metadata, self.preprocessing_info = pp.preprocess(
log2_transform,
remove_contaminations,
subset,
data_completeness,
normalization,
imputation,
remove_samples,
**kwargs,
"""A wrapper for Preprocess.preprocess(), see documentation there."""
self.mat, self.metadata, self.preprocessing_info = (
self._get_preprocess().preprocess(
log2_transform,
remove_contaminations,
subset,
data_completeness,
normalization,
imputation,
remove_samples,
**kwargs,
)
)
self.preprocessed = True

Expand All @@ -149,16 +154,59 @@ def reset_preprocessing(self):
print("All preprocessing steps are reset.")

def batch_correction(self, batch: str) -> None:
pp = Preprocess(
self.filter_columns,
self.rawinput,
"""A wrapper for Preprocess.batch_correction(), see documentation there."""
self.mat = self._get_preprocess().batch_correction(batch)

def _get_statistics(self) -> Statistics:
"""Return instance of the Statistics object."""
return Statistics(
self.mat,
self.metadata,
self.index_column,
self.sample,
self.metadata,
self.preprocessing_info,
self.mat,
)
self.mat = pp.batch_correction(batch)

def diff_expression_analysis(
self,
group1: Union[str, list],
group2: Union[str, list],
column: str = None,
method: str = "ttest",
perm: int = 10,
fdr: float = 0.05,
) -> pd.DataFrame:
"""A wrapper for the Statistics.diff_expression_analysis(), see documentation there."""
return self._get_statistics().diff_expression_analysis(
group1,
group2,
column,
method,
perm,
fdr,
)

def tukey_test(self, protein_id: str, group: str) -> pd.DataFrame:
"""A wrapper for tukey_test.tukey_test(), see documentation there."""
df = self.mat[[protein_id]].reset_index().rename(columns={"index": self.sample})
df = df.merge(self.metadata, how="inner", on=[self.sample])

return tukey_test(
df,
protein_id,
group,
self.index_column,
)

def anova(self, column: str, protein_ids="all", tukey: bool = True) -> pd.DataFrame:
"""A wrapper for Statistics.anova(), see documentation there."""
return self._get_statistics().anova(column, protein_ids, tukey)

def ancova(
self, protein_id: str, covar: Union[str, list], between: str
) -> pd.DataFrame:
"""A wrapper for Statistics.ancova(), see documentation there."""
return self._get_statistics().ancova(protein_id, covar, between)

def _check_loader(self, loader):
"""Checks if the Loader is from class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader
Expand Down
4 changes: 2 additions & 2 deletions alphastats/DataSet_Plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def plot_volcano(

return volcano_plot.plot

def plot_correlation_matrix(self, method: str = "pearson"):
def plot_correlation_matrix(self, method: str = "pearson"): # TODO unused
"""Plot Correlation Matrix

Args:
Expand Down Expand Up @@ -369,7 +369,7 @@ def plot_dendrogram(
)
return fig

def plot_imputed_values(self):
def plot_imputed_values(self): # not used
# get coordinates of missing values
df = self.mat
s = df.stack(dropna=False)
Expand Down
Loading
Loading