Skip to content

Commit

Permalink
Merge pull request #32 from WangHong007/master
Browse files Browse the repository at this point in the history
Get protein accessions from multiple databases
  • Loading branch information
ypriverol authored Nov 20, 2023
2 parents 10249dc + 736b6d1 commit 5b3e0d5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
18 changes: 11 additions & 7 deletions bin/compute_ibaq.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@ def normalize_ibaq(res: DataFrame) -> DataFrame:
return res


def parse_uniprot_accession(identifier: str) -> str:
def get_accession(identifier: str) -> str:
"""
Parse the uniprot accession from the identifier (e.g. sp|P12345|PROT_NAME)
:param identifier: Uniprot identifier
:return:
Get protein accession from the identifier (e.g. sp|P12345|PROT_NAME)
:param identifier: Protein identifier
:return: Protein accession
"""
return identifier.split("|")[1]
identifier_lst = identifier.split("|")
if len(identifier_lst) == 1:
return identifier_lst[0]
else:
return identifier_lst[1]


@click.command()
Expand Down Expand Up @@ -144,8 +148,8 @@ def get_average_nr_peptides_unique_bygroup(pdrow: Series) -> Series:
digest = list() # type: list[str]
digestor.digest(AASequence().fromString(entry.sequence), digest, min_aa, max_aa)
digestuniq = set(digest)
# TODO: We keep uniprot accessions rather than names.
protein_name = parse_uniprot_accession(entry.identifier)
# TODO: Try to get protein accessions from multiple databases.
protein_name = get_accession(entry.identifier)
uniquepepcounts[protein_name] = len(digestuniq)
protein_accessions.append(protein_name)

Expand Down
8 changes: 2 additions & 6 deletions ibaq/ibaqpy_commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,7 @@ def get_spectrum_prefix(reference_spectrum: str) -> str:
return re.split(r"\.mzML|\.MZML|\.raw|\.RAW|\.d|\.wiff", reference_spectrum)[0]


"""
Common functions when normalizing peptide dataframe
"""
# Common functions when normalizing peptide dataframe
def get_peptidoform_normalize_intensities(
dataset: DataFrame, higher_intensity: bool = True
) -> DataFrame:
Expand Down Expand Up @@ -436,9 +434,7 @@ def best_probability_error_bestsearch_engine(probability: float) -> float:
return 1 - probability


"""
Functions needed by Combiner
"""
# Functions needed by Combiner
def load_sdrf(sdrf_path: str) -> DataFrame:
"""
Load sdrf TSV as a dataframe.
Expand Down

0 comments on commit 5b3e0d5

Please sign in to comment.