Skip to content

Commit

Permalink
remove nono-breaking spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwoer committed Oct 18, 2024
1 parent 8d618cc commit 190496d
Show file tree
Hide file tree
Showing 17 changed files with 39 additions and 39 deletions.
2 changes: 1 addition & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# 0.5.3
* FIX FragPipe loading issue

# 0.5.2
# 0.5.2
* FIX FragPipe import #173

# 0.5.1
Expand Down
2 changes: 1 addition & 1 deletion alphastats/DataSet_Preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _filter(self):
logging.info("Contaminatons have already been filtered.")
return

#  print column names with contamination
# print column names with contamination
protein_groups_to_remove = self.rawinput[
self.rawinput[self.filter_columns].any(axis=1)
][self.index_column].tolist()
Expand Down
2 changes: 1 addition & 1 deletion alphastats/gui/AlphaPeptStats.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@
# https://discuss.streamlit.io/t/icons-for-the-multi-app-page-menu-in-the-sidebar-other-than-emojis/27222
# https://icons.getbootstrap.com/
# https://medium.com/codex/create-a-multi-page-app-with-the-new-streamlit-option-menu-component-3e3edaf7e7ad
#  https://lightrun.com/answers/streamlit-streamlit-set-multipage-app-emoji-in-stpage_config-not-filename
# https://lightrun.com/answers/streamlit-streamlit-set-multipage-app-emoji-in-stpage_config-not-filename
2 changes: 1 addition & 1 deletion alphastats/loader/AlphaPeptLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
self._add_contamination_reverse_column()
self._add_contamination_column()
self._read_all_column_names_as_string()
#  make ProteinGroup column
# make ProteinGroup column
self.rawinput["ProteinGroup"] = self.rawinput[self.index_column].map(
self.standardize_protein_group_column
)
Expand Down
4 changes: 2 additions & 2 deletions alphastats/loader/BaseLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ def _check_if_file_exists(self, file):
raise OSError(f"{file} does not exist.")

def _add_contamination_column(self):
#  load df with potential contamination from fasta file
# load df with potential contamination from fasta file
contaminations_path = (
importlib_resources.files(__package__) / "../data/contaminations.txt"
)
contaminations = pd.read_csv(contaminations_path, sep="\t")
contaminations_ids = contaminations["Uniprot ID"].to_list()
#  add column with True False
# add column with True False

self.rawinput["contamination_library"] = np.where(
self.rawinput[self.index_column].isin(contaminations_ids), True, False
Expand Down
4 changes: 2 additions & 2 deletions alphastats/loader/FragPipeLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def __init__(
"""
# https://github.com/Nesvilab/MSFragger/wiki/Interpreting-MSFragger-Output

#  SAMPLE
# SAMPLE
# columm Spectrum

# https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#combined_proteintsv
# Frag pipe
# https://github.com/Nesvilab/philosopher/wiki/Combined-protein-reports
#  ProteinProphet: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5863791/
# ProteinProphet: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5863791/
2 changes: 1 addition & 1 deletion alphastats/loader/GenericLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def _extract_sample_names(self, metadata: pd.DataFrame, sample_column: str):
def load_file(self, file_path):
if isinstance(file_path, pd.DataFrame):
df = file_path
#  loading file needs to be more beautiful
# loading file needs to be more beautiful
elif file_path.endswith(".xlsx"):
df = pd.read_excel(file_path)
# find robust way to detect file format
Expand Down
2 changes: 1 addition & 1 deletion alphastats/plots/DimensionalityReduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _plot(self, sample_names, group_color):
fig_dict["data"][count]["hovertemplate"] = hover
fig = go.Figure(fig_dict)

#  save plotting data in figure object
# save plotting data in figure object
fig = plotly_object(fig)
self._update_figure_attributes(
fig,
Expand Down
4 changes: 2 additions & 2 deletions alphastats/plots/IntensityPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _add_significance(plot):

group1, group2 = data[0]["name"], data[1]["name"]
y_array1, y_array2 = data[0]["y"], data[1]["y"]
#  do ttest
# do ttest
pvalue = scipy.stats.ttest_ind(y_array1, y_array2).pvalue

pvalue_text = "<i>p=" + str(round(pvalue, 4)) + "</i>"
Expand Down Expand Up @@ -136,7 +136,7 @@ def _add_significance(plot):
return plot

def _prepare_data(self):
#  TODO use difflib to find similar ProteinId if ProteinGroup is not present
# TODO use difflib to find similar ProteinId if ProteinGroup is not present
df = (
self.mat[[self.protein_id]]
.reset_index()
Expand Down
10 changes: 5 additions & 5 deletions alphastats/plots/VolcanoPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def _anova(self):
mat_transpose = self.mat.transpose()
fc = self._calculate_foldchange(mat_transpose, group1_samples, group2_samples)

#  check how column is ordered
# check how column is ordered
self.pvalue_column = self.group1 + " vs. " + self.group2 + " Tukey Test"

if self.pvalue_column not in result_df.columns:
Expand Down Expand Up @@ -402,7 +402,7 @@ def get_colored_labels(self):
self.res["label"] = np.where(
self.res.color != "non_sig", self.res[label_column], ""
)
#  replace nas with empty string (can cause error when plotting with gene names)
# replace nas with empty string (can cause error when plotting with gene names)
self.res["label"] = self.res["label"].fillna("")
self.res = self.res[self.res["label"] != ""]
if "color" not in self.res.columns:
Expand All @@ -427,7 +427,7 @@ def get_colored_labels_df(self):
self.res["label"] = np.where(
self.res.color != "non_sig", self.res[label_column], ""
)
#  replace nas with empty string (can cause error when plotting with gene names)
# replace nas with empty string (can cause error when plotting with gene names)
self.res["label"] = self.res["label"].fillna("")
self.res = self.res[self.res["label"] != ""]
if "color" not in self.res.columns:
Expand All @@ -447,7 +447,7 @@ def _add_labels_plot(self):
self.res["label"] = np.where(
self.res.color != "non_sig", self.res[label_column], ""
)
#  replace nas with empty string (can cause error when plotting with gene names)
# replace nas with empty string (can cause error when plotting with gene names)
self.res["label"] = self.res["label"].fillna("")
self.res["label"] = [
";".join([i for i in j.split(";") if i]) for j in self.res["label"].tolist()
Expand Down Expand Up @@ -536,7 +536,7 @@ def _plot(self):
self.plot.update_layout(showlegend=False)
self.plot.update_layout(width=600, height=700)

#  save plotting data in figure object
# save plotting data in figure object
self.plot = plotly_object(self.plot)
self._update_figure_attributes(
self.plot,
Expand Down
4 changes: 2 additions & 2 deletions alphastats/statistics/Anova.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _perform_anova(self) -> pd.DataFrame:
return anova_df

def _prepare_data(self):
#  generated list of list with samples
# generated list of list with samples
subgroup = self.metadata[self.column].unique().tolist()
self.all_groups = []
for sub in subgroup:
Expand All @@ -66,7 +66,7 @@ def _prepare_data(self):
self.mat_transpose = self.mat[self.protein_ids_list].transpose()

def _create_tukey_df(self, anova_df: pd.DataFrame) -> pd.DataFrame:
#  combine tukey results with anova results
# combine tukey results with anova results
df = (
self.mat[self.protein_ids_list]
.reset_index()
Expand Down
2 changes: 1 addition & 1 deletion alphastats/statistics/DifferentialExpressionAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _prepare_anndata(self):
reduced_matrix = reduced_matrix.loc[:, (reduced_matrix != 0).any(axis=0)]
# sort metadata according to matrix values
list_to_sort = reduced_matrix.index.to_list()
#  reduce metadata
# reduce metadata
obs_metadata = (
self.metadata[self.metadata[self.sample].isin(group_samples)]
.set_index(self.sample)
Expand Down
2 changes: 1 addition & 1 deletion nbs/getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@
"id": "b8244053-e920-447d-a405-7049507733a9",
"metadata": {},
"source": [
"## 3. Preprocess"
"## 3. Preprocess"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions nbs/liu_2019.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"id": "6b8d1145-74ff-40ef-914c-9a3fb21822d6",
"metadata": {},
"source": [
"# Plasma proteome profiling discovers novel proteins associated with non-alcoholic fatty liver disease\n",
"# Plasma proteome profiling discovers novel proteins associated with non-alcoholic fatty liver disease\n",
"Liu et al. 2019\n",
"\n",
"In this notebook we want to reproduce the results from [Liu et al](https://doi.org/10.15252/msb.20188793). The aim is to identify proteins associated with non-alcoholic fatty liver disease (NAFLD)."
Expand Down Expand Up @@ -230,7 +230,7 @@
"id": "9bcd5cda-0c9c-4e1a-9bcf-918a67cbbb35",
"metadata": {},
"source": [
"## Plot Intensity"
"## Plot Intensity"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion nbs/ramus_2016.ipynb

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_check_loader_no_error(self):
# nothing raised -> ok

def test_check_loader_error_invalid_column(self):
#  invalid index column
# invalid index column
with self.assertRaises(ValueError):
self.loader.index_column = 100
self.obj._check_loader(loader=self.loader)
Expand All @@ -57,13 +57,13 @@ def test_check_loader_error_empty_df(self):
self.obj._check_loader(loader=self.loader)

def test_check_loader_error_invalid_loader(self):
#  invalid loader, class
# invalid loader, class
with self.assertRaises(LoaderError):
df = pd.DataFrame()
self.obj._check_loader(loader=df)

def test_load_metadata(self):
#  is dataframe loaded
# is dataframe loaded
self.assertIsInstance(self.obj.metadata, pd.DataFrame)
self.assertFalse(self.obj.metadata.empty)

Expand All @@ -83,7 +83,7 @@ def test_load_metadata_warning(self, mock):
mock.assert_called_once()

def test_create_matrix(self):
#  matrix dimensions
# matrix dimensions
self.assertEqual(self.obj.mat.shape, self.matrix_dim)
# does the matrix only contain floats/integers and NAs
is_dtype_numeric = list(
Expand All @@ -109,7 +109,7 @@ def test_preprocess_filter(self, mock):
# is the new matrix smaller than the older matrix
self.obj.preprocess(remove_contaminations=True)
self.assertEqual(self.obj.mat.shape, self.matrix_dim_filtered)
#  info has been printed at least once
# info has been printed at least once
mock.assert_called_once()

@patch("logging.Logger.info")
Expand Down Expand Up @@ -176,7 +176,7 @@ def test_plot_sampledistribution(self):
plot_dict = plot.to_plotly_json()
# check if plotly object is not empty
self.assertEqual(len(plot_dict.get("data")), 1)
#  check if it is logscale
# check if it is logscale
self.assertEqual(plot_dict.get("layout").get("yaxis").get("type"), "log")

def test_reset_preprocessing(self):
Expand All @@ -190,7 +190,7 @@ def test_reset_preprocessing(self):


class TestAlphaPeptDataSet(BaseTestDataSet.BaseTest):
#  do testing which requires extra files only on TestAlphaPeptDataSet
# do testing which requires extra files only on TestAlphaPeptDataSet
# to reduce the amount of compariosn files required
def setUp(self):
self.loader = AlphaPeptLoader(file="testfiles/alphapept/results_proteins.csv")
Expand All @@ -203,7 +203,7 @@ def setUp(self):
# expected dimensions of matrix
self.matrix_dim = (2, 3781)
self.matrix_dim_filtered = (2, 3707)
#  metadata column to compare for PCA, t-test, etc.
# metadata column to compare for PCA, t-test, etc.
self.comparison_column = "disease"

def test_dataset_without_metadata(self):
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_remove_misc_samples_in_metadata(self, mock):
metadata_path_or_df=df,
sample_column="sample",
)
#  is sample C removed
# is sample C removed
self.assertEqual(self.obj.metadata.shape, (2, 2))
mock.assert_called_once()

Expand Down Expand Up @@ -358,11 +358,11 @@ def test_plot_sampledistribution_group(self):
self.assertIsInstance(plot, plotly.graph_objects.Figure)
# convert plotly object to dict
plot_dict = plot.to_plotly_json()
#  check if it doesnt get transformed to logscale
# check if it doesnt get transformed to logscale
self.assertEqual(plot_dict.get("layout").get("yaxis").get("type"), None)
# check if there are two groups control and disease
self.assertEqual(plot_dict.get("data")[0].get("legendgroup"), "control")
#  check that it is boxplot and not violinplot
# check that it is boxplot and not violinplot
is_boxplot = "boxmode" in plot_dict.get("layout")
self.assertTrue(is_boxplot)

Expand Down Expand Up @@ -820,7 +820,7 @@ def test_plot_intensity_box(self):
protein_id="A0A075B6H7", group="grouping1", method="box", log_scale=True
)
plot_dict = plot.to_plotly_json()
#  log scale
# log scale
self.assertEqual(plot_dict.get("layout").get("yaxis").get("type"), "log")
is_boxplot = "boxmode" in plot_dict.get("layout")
self.assertTrue(is_boxplot)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


class BaseTestLoader:
#  parent class of test loader for common tests among loaders
# parent class of test loader for common tests among loaders
# this is wrapped in a nested class so it doesnt get called separatly when testing
# plus to avoid multiple inheritance
class BaseTest(unittest.TestCase):
Expand All @@ -43,7 +43,7 @@ def test_check_if_columns_are_present_no_error(self):

@patch("logging.Logger.warning")
def test_check_if_indexcolumn_is_unique_warning(self, mock):
#  check if indexcolumn is unique
# check if indexcolumn is unique
# check if error gets raised when duplicate
obj = copy.deepcopy(self.obj)
obj.rawinput[obj.index_column] = "non unique"
Expand All @@ -52,7 +52,7 @@ def test_check_if_indexcolumn_is_unique_warning(self, mock):

# @patch("logging.Logger.warning")
# def test_check_if_indexcolumn_is_unique_no_warning(self,mock):
#  check if indexcolumn is unique
# check if indexcolumn is unique
# self.obj.check_if_indexcolumn_is_unique()
# mock.assert_not_called()

Expand Down

0 comments on commit 190496d

Please sign in to comment.