Skip to content

Commit

Permalink
Simplify mapper at the cost of passing over data twice
Browse files Browse the repository at this point in the history
  • Loading branch information
JuliaS92 committed Nov 20, 2024
1 parent 8a53232 commit aa06c74
Showing 1 changed file with 12 additions and 17 deletions.
29 changes: 12 additions & 17 deletions alphastats/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,31 +158,26 @@ def _create_id_dicts(self, sep: str = ";") -> Tuple[dict, dict, dict]:
protein_to_features_map = defaultdict(lambda: [])
feature_to_repr_map = defaultdict(lambda x: x)

for proteins, feature in self.rawinput[[Cols.INDEX, Cols.INDEX]].itertuples(
index=False
):
if feature not in features:
continue
# TODO: Shorten list if too many ids e.g. to id1;...(19) if 20 ids are present
feature_to_repr_map[feature] = "ids:" + proteins
for protein in proteins.split(sep):
protein_to_features_map[protein].append(feature)

if Cols.GENE_NAMES in self.rawinput.columns:
for genes, proteins, feature in self.rawinput[
[Cols.GENE_NAMES, Cols.INDEX, Cols.INDEX]
for genes, feature in self.rawinput[
[Cols.GENE_NAMES, Cols.INDEX]
].itertuples(index=False):
if feature not in features:
continue
if isinstance(genes, str):
for gene in genes.split(sep):
gene_to_features_map[gene].append(feature)
feature_to_repr_map[feature] = genes
else:
# TODO: Shorten list if too many ids e.g. to id1;...(19) if 20 ids are present
feature_to_repr_map[feature] = "ids:" + proteins
for protein in proteins.split(sep):
protein_to_features_map[protein].append(feature)
else:
for proteins, feature in self.rawinput[[Cols.INDEX, Cols.INDEX]].itertuples(
index=False
):
if feature not in features:
continue
# TODO: Shorten list if too many ids e.g. to id1;...(19) if 20 ids are present
feature_to_repr_map[feature] = "ids:" + proteins
for protein in proteins.split(sep):
protein_to_features_map[protein].append(feature)

return gene_to_features_map, protein_to_features_map, feature_to_repr_map

Expand Down

0 comments on commit aa06c74

Please sign in to comment.