Skip to content

Commit

Permalink
Merge pull request #107 from MannLabs/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
GeorgWa authored Jul 17, 2023
2 parents 5835683 + 4120205 commit 525529a
Show file tree
Hide file tree
Showing 45 changed files with 2,712 additions and 1,535 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.0.2
current_version = 1.0.3
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@

Follow the changelog format from https://keepachangelog.com/en/1.0.0/.

## 1.1.0 - 2023.xx.xx

### Added

- Separate `library_reader_base` in `psm_reader.yaml` config for `LibraryReaderBase`.

### Changed

- `mod@Any N-term` and `mod@Any_N-term` are both supported, `Any_N-term` is prefered as there are no spaces and hence better for command line tools. The same for `mod@Protein N-term`, `mod@Any C-term`, and `mod@Protein C-term`.
- Enable customizing dtypes of peak mz and intensty values.
- `SWATHLibraryReader` to `LibraryBaseReader` in `alphabase.spectral_library.reader`.
- New `LibraryReaderBase._get_fragment_intensity` implementation which is called at the end of the parsing process in `PSMReaderBase._post_process`. This allows it to operate only on the translated column names. By default, all non-fragment columns will be grouped and part of the final output.
- `SpecLibBase.copy()` for copying spectral libraries including all attributes.
- `SpecLibBase.append()` for appending spectral libraries while maintaining the fragment index mapping.

## 1.0.2 - 2023.02.10

### Changed
Expand Down
4 changes: 2 additions & 2 deletions alphabase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__project__ = "alphabase"
__version__ = "1.0.2"
__version__ = "1.0.3"
__license__ = "Apache"
__description__ = "An infrastructure Python package of the AlphaX ecosystem"
__author__ = "Mann Labs"
Expand Down Expand Up @@ -39,5 +39,5 @@
"PyPi": "https://pypi.org/project/alphabase/",
}
__extra_requirements__ = {
"development": "requirements_development.txt",
"development": "extra_requirements/development.txt",
}
21 changes: 19 additions & 2 deletions alphabase/constants/_const.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
import os
import numpy as np

from alphabase.yaml_utils import load_yaml

CONST_FILE_FOLDER = os.path.join(
os.path.dirname(__file__),
'const_files'
)
"const_files"
)

common_const_dict:dict = load_yaml(
os.path.join(CONST_FILE_FOLDER, "common_constants.yaml")
)

# Only applied in peak and fragment dataframes to save RAM.
# Using float32 still keeps 0.1 ppm precision in any value range.
# Default float dtype is "float64" for value calculation and other senarios.
PEAK_MZ_DTYPE:np.dtype = np.dtype(
common_const_dict["PEAK_MZ_DTYPE"]
).type
PEAK_INTENSITY_DTYPE:np.dtype = np.dtype(
common_const_dict["PEAK_INTENSITY_DTYPE"]
).type
7 changes: 3 additions & 4 deletions alphabase/constants/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@

from alphabase.yaml_utils import load_yaml

from alphabase.constants._const import CONST_FILE_FOLDER

common_const_dict:dict = load_yaml(
os.path.join(CONST_FILE_FOLDER, 'common_constants.yaml')
from alphabase.constants._const import (
CONST_FILE_FOLDER,
common_const_dict
)

MASS_PROTON:float = common_const_dict['MASS_PROTON']
Expand Down
6 changes: 5 additions & 1 deletion alphabase/constants/const_files/common_constants.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@ MOBILITY:
# Mason Schamp equation of Burker.
CCS_IM_COEF: 1059.62245
# 28 is the mass of N(2), the default gas in IM bruker
IM_GAS_MASS: 28.0
IM_GAS_MASS: 28.0

# Only applied in peak/fragment dataframes to save RAM
PEAK_MZ_DTYPE: float32
PEAK_INTENSITY_DTYPE: float32
39 changes: 38 additions & 1 deletion alphabase/constants/const_files/psm_reader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ alphapept:
'Phospho@T': 'pT'
'Phospho@Y': 'pY'
'Acetyl@Protein N-term': 'a'

maxquant:
reader_type: maxquant
rt_unit: minute
Expand All @@ -45,6 +46,7 @@ maxquant:
'genes': ['Gene Names','Gene names']
'decoy': 'Reverse'
'intensity': 'Intensity'

modification_mapping:
'Acetyl@Protein N-term':
- '_(Acetyl (Protein N-term))'
Expand Down Expand Up @@ -74,6 +76,7 @@ maxquant:
'Deamidated@N': ['N(Deamidation (NQ))','N(de)']
'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)']
'GlyGly@K': ['K(GlyGly (K))', 'K(gl)']

pfind:
reader_type: pfind
rt_unit: minute
Expand Down Expand Up @@ -117,7 +120,9 @@ msfragger_pepxml:
- 'Glu->pyro-Glu@E^Any N-term'
- 'Gln->pyro-Glu@Q^Any N-term'
- 'Dimethyl@K' # Any N-term is not needed here as it will be infered in-the-fly
- 'Methyl@E' #an example of a PTM that can be C-term
mod_mass_tol: 0.1 # Da

diann:
reader_type: diann
rt_unit: minute
Expand Down Expand Up @@ -148,6 +153,7 @@ spectronaut_report:
'uniprot_ids': 'PG.UniProtIds'
'charge': 'charge'
modification_mapping: 'maxquant'

spectronaut:
reader_type: spectronaut
rt_unit: irt
Expand All @@ -171,4 +177,35 @@ spectronaut:
'uniprot_ids': ['UniProtIds','UniProtID','UniprotId']
'genes': ['Genes','Gene','GeneName','GeneNames']
modification_mapping: 'maxquant'


library_reader_base:
reader_type: library_reader_base
rt_unit: irt
fixed_C57: False
csv_sep: "\t"
mod_seq_columns:
- 'ModifiedPeptideSequence'
- 'ModifiedPeptide'
- 'ModifiedSequence'
- 'FullUniModPeptideName'
- 'LabeledSequence'
- 'FullUniModPeptideName'
column_mapping:
'raw_name': 'ReferenceRun'
'sequence': ['PeptideSequence', 'StrippedPeptide']
'modified_sequence': ['ModifiedPeptideSequence','ModifiedPeptide']
'charge': 'PrecursorCharge'
'rt': ['RT','iRT','Tr_recalibrated','RetentionTime','NormalizedRetentionTime']
'ccs': 'CCS'
'precursor_mz': 'PrecursorMz'
'mobility': ['Mobility','IonMobility','PrecursorIonMobility']
'proteins': ['ProteinId','ProteinID','ProteinName','Protein Name',]
'uniprot_ids': ['UniProtIds','UniProtID','UniprotId']
'genes': ['GeneName','Genes','Gene',]
'fragment_intensity': ['LibraryIntensity','RelativeIntensity', 'RelativeFragmentIntensity', 'RelativeFragmentIonIntensity']
'fragment_mz': ['ProductMz']
'fragment_type': ['FragmentType', 'FragmentIonType', 'ProductType', 'ProductIonType']
'fragment_charge' : ['FragmentCharge', 'FragmentIonCharge', 'ProductCharge', 'ProductIonCharge']
'fragment_series': ['FragmentSeriesNumber','FragmentNumber']
'fragment_loss_type': ['FragmentLossType', 'FragmentIonLossType', 'ProductLossType', 'ProductIonLossType']
modification_mapping: 'maxquant'
5 changes: 5 additions & 0 deletions alphabase/constants/modification.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ def load_mod_df(
):
global MOD_DF
MOD_DF = pd.read_table(tsv)
_df = MOD_DF[MOD_DF.mod_name.str.contains(' ', regex=False)].copy()
_df["mod_name"] = MOD_DF.mod_name.str.replace(' ', '_', regex=False)
MOD_DF = pd.concat(
[MOD_DF, _df], ignore_index=True
).drop_duplicates("mod_name")
MOD_DF.fillna('',inplace=True)
MOD_DF['unimod_id'] = MOD_DF.unimod_id.astype(np.int32)
MOD_DF.set_index('mod_name', drop=False, inplace=True)
Expand Down
Loading

0 comments on commit 525529a

Please sign in to comment.