diff --git a/bin/compute_ibaq.py b/bin/compute_ibaq.py index e742fa0..965d3fe 100644 --- a/bin/compute_ibaq.py +++ b/bin/compute_ibaq.py @@ -43,13 +43,17 @@ def normalize_ibaq(res: DataFrame) -> DataFrame: return res -def parse_uniprot_accession(identifier: str) -> str: +def get_accession(identifier: str) -> str: """ - Parse the uniprot accession from the identifier (e.g. sp|P12345|PROT_NAME) - :param identifier: Uniprot identifier - :return: + Get protein accession from the identifier (e.g. sp|P12345|PROT_NAME) + :param identifier: Protein identifier + :return: Protein accession """ - return identifier.split("|")[1] + identifier_lst = identifier.split("|") + if len(identifier_lst) == 1: + return identifier_lst[0] + else: + return identifier_lst[1] @click.command() @@ -144,8 +148,8 @@ def get_average_nr_peptides_unique_bygroup(pdrow: Series) -> Series: digest = list() # type: list[str] digestor.digest(AASequence().fromString(entry.sequence), digest, min_aa, max_aa) digestuniq = set(digest) - # TODO: We keep uniprot accessions rather than names. - protein_name = parse_uniprot_accession(entry.identifier) + # TODO: Try to get protein accessions from multiple databases. + protein_name = get_accession(entry.identifier) uniquepepcounts[protein_name] = len(digestuniq) protein_accessions.append(protein_name) diff --git a/ibaq/ibaqpy_commons.py b/ibaq/ibaqpy_commons.py index 69ce63c..ec001a3 100644 --- a/ibaq/ibaqpy_commons.py +++ b/ibaq/ibaqpy_commons.py @@ -341,9 +341,7 @@ def get_spectrum_prefix(reference_spectrum: str) -> str: return re.split(r"\.mzML|\.MZML|\.raw|\.RAW|\.d|\.wiff", reference_spectrum)[0] -""" -Common functions when normalizing peptide dataframe -""" +# Common functions when normalizing peptide dataframe def get_peptidoform_normalize_intensities( dataset: DataFrame, higher_intensity: bool = True ) -> DataFrame: @@ -436,9 +434,7 @@ def best_probability_error_bestsearch_engine(probability: float) -> float: return 1 - probability -""" -Functions needed by Combiner -""" +# Functions needed by Combiner def load_sdrf(sdrf_path: str) -> DataFrame: """ Load sdrf TSV as a dataframe.