Merge pull request #230 from MannLabs/development

release 1.4.1
MannLabs · Nov 12, 2024 · 8cc183f · 8cc183f
2 parents a05c3ac + 469d6a6
commit 8cc183f
Show file tree

Hide file tree

Showing 43 changed files with 1,244 additions and 1,990 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.4.0
+current_version = 1.4.1
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
@@ -14,19 +14,5 @@ serialize =
 [bumpversion:file:./alphabase/__init__.py]
 
 [bumpversion:file:./docs/conf.py]
-
-[bumpversion:file:./release/one_click_linux_gui/control]
-
-[bumpversion:file:./release/one_click_linux_gui/create_installer_linux.sh]
-
-[bumpversion:file:./release/one_click_macos_gui/distribution.xml]
-
-[bumpversion:file:./release/one_click_macos_gui/Info.plist]
-
-[bumpversion:file:./release/one_click_macos_gui/create_installer_macos.sh]
-
-[bumpversion:file:./release/one_click_windows_gui/create_installer_windows.sh]
-
-[bumpversion:file:./release/one_click_windows_gui/alphabase_innoinstaller.iss]
 search = {current_version}
 replace = {new_version}
diff --git a/.github/workflows/publish_and_release.yaml b/.github/workflows/publish_and_release.yaml
diff --git a/README.md b/README.md
@@ -197,6 +197,17 @@ For an even more interactive participation, check out the
 [the Contributors License Agreement](misc/CLA.md).
 
 ### Notes for developers
+
+#### Tagging of changes
+In order to have release notes automatically generated, changes need to be tagged with labels.
+The following labels are used (should be safe-explanatory):
+`breaking-change`, `bug`, `enhancement`.
+
+#### Release a new version
+This package uses a shared release process defined in the
+[alphashared](https://github.com/MannLabs/alphashared) repository. Please see the instructions
+[there](https://github.com/MannLabs/alphashared/blob/reusable-release-workflow/.github/workflows/README.md#release-a-new-version).
+
 #### pre-commit hooks
 It is highly recommended to use the provided pre-commit hooks, as the CI pipeline enforces all checks therein to
 pass in order to merge a branch.

diff --git a/alphabase/__init__.py b/alphabase/__init__.py
@@ -2,7 +2,7 @@
 
 
 __project__ = "alphabase"
-__version__ = "1.4.0"
+__version__ = "1.4.1"
 __license__ = "Apache"
 __description__ = "An infrastructure Python package of the AlphaX ecosystem"
 __author__ = "Mann Labs"

diff --git a/alphabase/constants/const_files/modification.tsv b/alphabase/constants/const_files/modification.tsv
@@ -2440,7 +2440,7 @@ PhosphoCytidine@S	305.041287	305.1812	H(12)C(9)N(3)O(7)P(1)	0.0		Post-translatio
 AzidoF@F	41.001397	41.0122	H(-1)N(3)	0.0		Chemical derivative	1845		0.0
 Dimethylaminoethyl@C	71.073499	71.121	H(9)C(4)N(1)	0.0		Chemical derivative	1846		0.0
 Gluratylation@K	114.031694	114.0993	H(6)C(5)O(3)	0.0		Post-translational	1848		0.0
-hydroxyisobutyryl@K	86.036779	86.0892	H(6)C(4)O(2)	0.0		Post-translational	1849		0.0
+hydroxyisobutyryl@K	86.036779	86.0892	H(6)C(4)O(2)	0.0		Post-translational	1849	CC(C)(O)C(=O)NCCCCC(N([Xe])[Xe])C([Rn])=O	0.0
 MeMePhosphorothioate@S	107.979873	108.0993	H(5)C(2)O(1)P(1)S(1)	0.0		Chemical derivative	1868		0.0
 Cation:Fe[III]@D	52.911464	52.8212	H(-3)Fe(1)	0.0		Artefact	1870		0.0
 Cation:Fe[III]@E	52.911464	52.8212	H(-3)Fe(1)	0.0		Artefact	1870		0.0

diff --git a/alphabase/constants/const_files/psm_reader.yaml b/alphabase/constants/const_files/psm_reader.yaml
@@ -38,6 +38,7 @@ maxquant:
     'scan_num':
       - 'Scan number'
       - 'MS/MS scan number'
+      - 'MS/MS Scan Number'
       - 'Scan index'
     'raw_name': 'Raw file'
     'precursor_mz': 'm/z'
@@ -57,6 +58,8 @@ maxquant:
     'Acetyl@Protein_N-term':
       - '_(Acetyl (Protein_N-term))'
       - '_(ac)'
+    'Acetyl@K':
+      - 'K(ac)'
     'Carbamidomethyl@C':
       - 'C(Carbamidomethyl (C))'
       - 'C(Carbamidomethyl)'
@@ -68,22 +71,37 @@ maxquant:
       - 'S(Phospho (S))'
       - 'S(Phospho (ST))'
       - 'S(Phospho (STY))'
+      - 'S(Phospho (STYDH))'
       - 'S(ph)'
       - 'pS'
     'Phospho@T':
       - 'T(Phospho (T))'
       - 'T(Phospho (ST))'
       - 'T(Phospho (STY))'
+      - 'T(Phospho (STYDH))'
       - 'T(ph)'
       - 'pT'
     'Phospho@Y':
       - 'Y(Phospho (Y))'
       - 'Y(Phospho (STY))'
-      - 'Y(ph)'
+      - 'Y(Phospho (STYDH))'
       - 'pY'
+    'Phospho@D':
+      - 'D(Phospho (STYDH))'
+      - 'pD'
+    'Phospho@H':
+      - 'H(Phospho (STYDH))'
+      - 'pH'
+    'Crotonyl@K':
+      - 'K(cr)'
+    'Lactylation@K':
+      - 'K(la)'
+    'Succinyl@K':
+      - 'K(su)'
     'Deamidated@N': ['N(Deamidation (NQ))','N(de)']
     'Deamidated@Q': ['Q(Deamidation (NQ))','Q(de)']
     'GlyGly@K': ['K(GlyGly (K))', 'K(gl)']
+    'hydroxyisobutyryl@K': 'K(2-)'
 
 pfind:
   reader_type: pfind

diff --git a/alphabase/io/hdf.py b/alphabase/io/hdf.py
@@ -45,7 +45,7 @@ def editing_mode(self, read_only: bool = False, truncate: bool = True):
 
     @property
     def metadata(self):
-        with h5py.File(self.file_name, "a") as hdf_file:
+        with h5py.File(self.file_name) as hdf_file:
             return dict(hdf_file[self.name].attrs)
 
     def __init__(
@@ -162,7 +162,7 @@ def components(self):
         group_names = []
         dataset_names = []
         datafame_names = []
-        with h5py.File(self.file_name, "a") as hdf_file:
+        with h5py.File(self.file_name) as hdf_file:
             hdf_object = hdf_file[self.name]
             for name in sorted(hdf_object):
                 if isinstance(hdf_object[name], h5py.Dataset):
@@ -328,20 +328,20 @@ def __len__(self):
 
     @property
     def dtype(self):
-        with h5py.File(self.file_name, "a") as hdf_file:
+        with h5py.File(self.file_name) as hdf_file:
             return hdf_file[self.name].dtype
 
     @property
     def shape(self):
-        with h5py.File(self.file_name, "a") as hdf_file:
+        with h5py.File(self.file_name) as hdf_file:
             return hdf_file[self.name].shape
 
     @property
     def values(self):
         return self[...]
 
     def __getitem__(self, keys):
-        with h5py.File(self.file_name, "a") as hdf_file:
+        with h5py.File(self.file_name) as hdf_file:
             hdf_object = hdf_file[self.name]
             if h5py.check_string_dtype(hdf_object.dtype) is not None:
                 hdf_object = hdf_object.asstr()

diff --git a/alphabase/io/tempmmap.py b/alphabase/io/tempmmap.py
@@ -84,7 +84,7 @@ def _get_file_location(abs_file_path: str, overwrite=False) -> str:
         )
 
     # ensure that the filename conforms to the naming convention
-    if not os.path.basename.endswith(".hdf"):
+    if not os.path.basename(abs_file_path).endswith(".hdf"):
         raise ValueError("The chosen file name needs to end with .hdf")
 
     # ensure that the directory in which the file should be created exists
@@ -165,7 +165,7 @@ def array(shape: tuple, dtype: np.dtype, tmp_dir_abs_path: str = None) -> np.nda
 
     with h5py.File(temp_file_name, "w") as hdf_file:
         array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
-        array[0] = 0
+        array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
         offset = array.id.get_offset()
 
     with open(temp_file_name, "rb+") as raw_hdf_file:
@@ -225,7 +225,7 @@ def create_empty_mmap(
 
     with h5py.File(temp_file_name, "w") as hdf_file:
         array = hdf_file.create_dataset("array", shape=shape, dtype=dtype)
-        array[0] = 0
+        array[0] = np.string_("") if isinstance(dtype, np.dtypes.StrDType) else 0
 
     return temp_file_name
 

diff --git a/alphabase/psm_reader/maxquant_reader.py b/alphabase/psm_reader/maxquant_reader.py
@@ -1,4 +1,5 @@
 import copy
+import warnings
 
 import numba
 import numpy as np
@@ -11,6 +12,9 @@
     psm_reader_yaml,
 )
 
+# make sure all warnings are shown
+warnings.filterwarnings("always")
+
 mod_to_unimod_dict = {}
 for mod_name, unimod_id in MOD_DF[["mod_name", "unimod_id"]].values:
     unimod_id = int(unimod_id)
@@ -245,6 +249,20 @@ def _load_file(self, filename):
         self._find_mod_seq_column(df)
         df = df[~pd.isna(df["Retention time"])]
         df.fillna("", inplace=True)
+
+        # remove MBR PSMs as they are currently not supported and will crash import
+        mapped_columns = self._find_mapped_columns(df)
+        if "scan_num" in mapped_columns:
+            scan_num_col = mapped_columns["scan_num"]
+            no_ms2_mask = df[scan_num_col] == ""
+            if (num_no_ms2_mask := np.sum(no_ms2_mask)) > 0:
+                warnings.warn(
+                    f"Maxquant psm file contains {num_no_ms2_mask} MBR PSMs without MS2 scan. This is not yet supported and rows containing MBR PSMs will be removed."
+                )
+                df = df[~no_ms2_mask]
+                df.reset_index(drop=True, inplace=True)
+        df[scan_num_col] = df[scan_num_col].astype(int)
+
         # if 'K0' in df.columns:
         #     df['Mobility'] = df['K0'] # Bug in MaxQuant? It should be 1/K0
         # min_rt = df['Retention time'].min()

diff --git a/alphabase/psm_reader/sage_reader.py b/alphabase/psm_reader/sage_reader.py
@@ -538,17 +538,31 @@ def _get_annotated_mod_df() -> pd.DataFrame:
     ]
 
 
-def _sage_spec_idx_from_scan_nr(scan_nr: str) -> int:
+def _sage_spec_idx_from_scan_nr(scan_indicator_str: str) -> int:
     """Extract the spectrum index from the scan_nr field in Sage output.
+    Sage uses 1-based indexing for spectra, so we need to subtract 1 to convert to 0-based indexing.
 
     Parameters
     ----------
 
-    scan_nr : str
-        The scan_nr field in Sage output.
+    scan_indicator_str : str
+        The scan_indicator_str field in Sage output.
+        e.g. `'controllerType=0 controllerNumber=1 scan=7846'`
+
+    Returns
+    -------
+
+    int
+        The 0-based spectrum index.
+
+    Examples
+    --------
+
+    >>> _sage_spec_idx_from_scan_nr('controllerType=0 controllerNumber=1 scan=7846')
+    7845
 
     """
-    return int(scan_nr.split("=")[-1])
+    return int(re.search(r"scan=(\d+)", scan_indicator_str).group(1)) - 1
 
 
 class SageReaderBase(PSMReaderBase):