diff --git a/alphastats/dataset/dataset.py b/alphastats/dataset/dataset.py index 0b2f4e4b..8152abb3 100644 --- a/alphastats/dataset/dataset.py +++ b/alphastats/dataset/dataset.py @@ -158,9 +158,19 @@ def _create_id_dicts(self, sep: str = ";") -> Tuple[dict, dict, dict]: protein_to_features_map = defaultdict(lambda: []) feature_to_repr_map = defaultdict(lambda x: x) + for proteins, feature in self.rawinput[[Cols.INDEX, Cols.INDEX]].itertuples( + index=False + ): + if feature not in features: + continue + # TODO: Shorten list if too many ids e.g. to id1;...(19) if 20 ids are present + feature_to_repr_map[feature] = "ids:" + proteins + for protein in proteins.split(sep): + protein_to_features_map[protein].append(feature) + if Cols.GENE_NAMES in self.rawinput.columns: - for genes, proteins, feature in self.rawinput[ - [Cols.GENE_NAMES, Cols.INDEX, Cols.INDEX] + for genes, feature in self.rawinput[ + [Cols.GENE_NAMES, Cols.INDEX] ].itertuples(index=False): if feature not in features: continue @@ -168,21 +178,6 @@ def _create_id_dicts(self, sep: str = ";") -> Tuple[dict, dict, dict]: for gene in genes.split(sep): gene_to_features_map[gene].append(feature) feature_to_repr_map[feature] = genes - else: - # TODO: Shorten list if too many ids e.g. to id1;...(19) if 20 ids are present - feature_to_repr_map[feature] = "ids:" + proteins - for protein in proteins.split(sep): - protein_to_features_map[protein].append(feature) - else: - for proteins, feature in self.rawinput[[Cols.INDEX, Cols.INDEX]].itertuples( - index=False - ): - if feature not in features: - continue - # TODO: Shorten list if too many ids e.g. to id1;...(19) if 20 ids are present - feature_to_repr_map[feature] = "ids:" + proteins - for protein in proteins.split(sep): - protein_to_features_map[protein].append(feature) return gene_to_features_map, protein_to_features_map, feature_to_repr_map