Skip to content

Commit

Permalink
Add quick fixes and todos addressing comments on #378
Browse files Browse the repository at this point in the history
  • Loading branch information
JuliaS92 committed Nov 25, 2024
1 parent e9f228a commit adb223e
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 6 deletions.
30 changes: 25 additions & 5 deletions alphastats/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,31 @@ def _check_loader(loader):
)

def _create_id_dicts(self, sep: str = ";") -> Tuple[dict, dict, dict]:
"""Create mapprings from gene, protein to feature and from feature to repr."""
"""
Create mappings from gene and protein to feature, and from feature to representation.
Features are the entities measured in each sample, usually protein groups represented by semicolon separated protein ids.
This is to maintain the many-to-many relationships between the three entities feature, protein and gene.
This method processes the raw input data to generate three dictionaries:
1. gene_to_features_map: Maps each gene to a list of features.
2. protein_to_features_map: Maps each protein to a list of features.
3. feature_to_repr_map: Maps each feature to its representation string.
Args:
sep (str): The separator used to split gene and protein identifiers. Default is ";".
Returns:
Tuple[dict, dict, dict]: A tuple containing three dictionaries:
- gene_to_features_map (dict): A dictionary mapping genes to features.
- protein_to_features_map (dict): A dictionary mapping proteins to features.
- feature_to_repr_map (dict): A dictionary mapping features to their representation strings.
"""

features = self.mat.columns.to_list()
gene_to_features_map = defaultdict(lambda: [])
protein_to_features_map = defaultdict(lambda: [])
feature_to_repr_map = defaultdict(lambda x: x)
gene_to_features_map = defaultdict(list)
protein_to_features_map = defaultdict(list)
feature_to_repr_map = {}
# TODO: Make sure both iterators are with zip after merging branches.

for proteins, feature in self.rawinput[[Cols.INDEX, Cols.INDEX]].itertuples(
index=False
Expand Down Expand Up @@ -469,7 +488,8 @@ def _get_features_for_gene_name(
self,
gene_name: str,
) -> list:
"""Get protein groups from gene id. If gene id is not present, return gene id, as we might already have a gene id.
# TODO: This should raise an error and not return the gene name if it is not actually in the data.
"""Get feature from gene name. If gene name is not present, return gene name, as we might already have a gene id.
'HEL114' -> ['P18206;A0A024QZN4;V9HWK2;B3KXA2;Q5JQ13;B4DKC9;B4DTM7;A0A096LPE1']
Args:
Expand Down
2 changes: 2 additions & 0 deletions alphastats/loader/maxquant_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,13 @@ def __init__(
self._set_filter_columns_to_true_false()
self._read_all_column_names_as_string()

# TODO externalize to a method
intensity_columns = [
col
for col in self.rawinput.columns
if intensity_column.replace("[sample]", "") in col
]
# TODO: explain why we do this
if len(self.rawinput.dropna(subset=intensity_columns, how="all")) != len(
self.rawinput
):
Expand Down
2 changes: 1 addition & 1 deletion alphastats/plots/intensity_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def __init__(
self.method = method
self.add_significance = add_significance
self.log_scale = log_scale
# TODO: rename y_axis to make clear this is not a name from the original data
self.y_axis = self.intensity_column.replace("[sample]", "").strip()
if self.preprocessing_info[PreprocessingStateKeys.LOG2_TRANSFORMED]:
self.y_axis = "log2(" + self.y_axis + ")"
Expand Down Expand Up @@ -139,7 +140,6 @@ def _add_significance(plot):
return plot

def _prepare_data(self):
# TODO use difflib to find similar ProteinId if ProteinGroup is not present
df = (
self.mat[self.protein_id].melt(
ignore_index=False,
Expand Down
1 change: 1 addition & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,6 +1061,7 @@ def test_preprocess_replace_zero(self):
)

def test_create_id_mapping(self):
# TODO: Test the actual dicts.
"""Test id maps"""
self.assertEqual(len(self.obj._gene_to_features_map), 21)
self.assertEqual(len(self.obj._gene_to_features_map["G14"]), 2)
Expand Down

0 comments on commit adb223e

Please sign in to comment.