From 7fcc8892975ab954eb74cd3adef8cc10ab8cc52e Mon Sep 17 00:00:00 2001 From: Manuel Date: Wed, 24 Jul 2019 17:09:39 +0200 Subject: [PATCH 1/9] Add deconvolution functionality - add ability to deconvolute spectra using ms_deisotope (https://github.com/mobiusklein/ms_deisotope) - add first test - add ms_deisotop in requirements.txt --- pymzml/spec.py | 21 +++++++++++++++++++++ requirements.txt | 1 + tests/ms2_spec_test.py | 18 +++++++++++++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index f5ea506f..e4c67695 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -1,3 +1,4 @@ +import time #!usr/bin/env python3 # -*- coding: latin-1 -*- """ @@ -1011,6 +1012,22 @@ def peaks(self, peak_type): peaks.sort(key=itemgetter(0)) return peaks + def _deconvolute_peaks(self, *args, **kwargs): + start = time.time() + from ms_deisotope.deconvolution import deconvolute_peaks + from ms_peak_picker import simple_peak + peaks = self.peaks("centroided") + # pack peak matrix into expected structure + peaks = [simple_peak(p[0], p[1], 0.01) for p in peaks] + decon_result = deconvolute_peaks(peaks, *args, **kwargs) + dpeaks = decon_result.peak_set + # pack deconvoluted peak list into matrix structure + dpeaks_mat = np.zeros((len(dpeaks), 3), dtype=float) + for i, dp in enumerate(dpeaks): + dpeaks_mat[i, :] = dp.neutral_mass, dp.intensity, dp.charge + print(f'took {time.time() - start} seconds') + return dpeaks_mat + def set_peaks(self, peaks, peak_type): """ Assign a custom peak array of type peak_type @@ -1034,6 +1051,10 @@ def set_peaks(self, peaks, peak_type): self._peak_dict['reprofiled'] = dict(peaks) except TypeError: self._peak_dict['reprofiled'] = None + elif peak_type == 'deconvoluted': + self._peak_dict['deconvoluted'] = peaks + self._mz = self.peaks('raw')[:,0] + self._i = self.peaks('raw') else: raise Exception( 'Peak type is not suppported\n' diff --git a/requirements.txt b/requirements.txt index 081c5529..d8846fc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ numpy >= 1.8.0 plotly < 2.0 pynumpress >= 0.0.4 regex +git+https://github.com/mobiusklein/ms_deisotope.git # Testing and dev cython diff --git a/tests/ms2_spec_test.py b/tests/ms2_spec_test.py index 0c7e81c5..f3a27765 100755 --- a/tests/ms2_spec_test.py +++ b/tests/ms2_spec_test.py @@ -1,10 +1,11 @@ import sys import os sys.path.append(os.path.abspath('.')) +from pymzml.spec import PROTON import pymzml.run as run import unittest import test_file_paths - +import numpy as np class SpectrumMS2Test(unittest.TestCase): """ @@ -49,5 +50,20 @@ def test_select_precursors(self): }] ) + def test_deconvolute_peaks(self): + charge = 3 + test_mz = 430.313 + arr = np.array([(test_mz, 100), (test_mz + PROTON/charge, 49)]) + spec = self.Run[2548] + print(spec.ms_level) + spec.set_peaks(arr, 'centroided') + decon = spec.peaks('deconvoluted') + self.assertEqual(len(decon), 1) + print(decon) + decon_mz = (test_mz * charge) - charge * PROTON + self.assertEqual(decon[0][0], decon_mz) + self.assertEqual(decon[0][1], 149) # 149 since itensities are 100 and 49 + self.assertEqual(decon[0][2], 3) + if __name__ == '__main__': unittest.main(verbosity=3) \ No newline at end of file From 0c10ec561f85a8ac4e9b9c93b3cf72461d7d42fa Mon Sep 17 00:00:00 2001 From: Manuel Date: Wed, 24 Jul 2019 17:51:29 +0200 Subject: [PATCH 2/9] remove print function --- pymzml/spec.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index e4c67695..6f505698 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -1025,7 +1025,6 @@ def _deconvolute_peaks(self, *args, **kwargs): dpeaks_mat = np.zeros((len(dpeaks), 3), dtype=float) for i, dp in enumerate(dpeaks): dpeaks_mat[i, :] = dp.neutral_mass, dp.intensity, dp.charge - print(f'took {time.time() - start} seconds') return dpeaks_mat def set_peaks(self, peaks, peak_type): From fb1ad3d99c64fbb0b6873bb1a86117d4ad5fb5d9 Mon Sep 17 00:00:00 2001 From: Manuel Date: Thu, 25 Jul 2019 10:51:56 +0200 Subject: [PATCH 3/9] Remove unused time import --- pymzml/spec.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index 6f505698..3ed6b0c6 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -1,4 +1,3 @@ -import time #!usr/bin/env python3 # -*- coding: latin-1 -*- """ @@ -50,6 +49,8 @@ from struct import unpack import numpy as np +from ms_deisotope.deconvolution import deconvolute_peaks +from ms_peak_picker import simple_peak from . import regex_patterns from .decoder import MSDecoder @@ -1013,9 +1014,6 @@ def peaks(self, peak_type): return peaks def _deconvolute_peaks(self, *args, **kwargs): - start = time.time() - from ms_deisotope.deconvolution import deconvolute_peaks - from ms_peak_picker import simple_peak peaks = self.peaks("centroided") # pack peak matrix into expected structure peaks = [simple_peak(p[0], p[1], 0.01) for p in peaks] From 33da5f6819ea315257d53d6d703b37e069ed25b3 Mon Sep 17 00:00:00 2001 From: Manuel Date: Fri, 26 Jul 2019 17:31:55 +0200 Subject: [PATCH 4/9] Update requirements.txt - add commit hash to ms_deisotope dependency --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d8846fc1..000d56ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ numpy >= 1.8.0 plotly < 2.0 pynumpress >= 0.0.4 regex -git+https://github.com/mobiusklein/ms_deisotope.git +git+https://github.com/mobiusklein/ms_deisotope.git#90263474a0d0afcd3f4b79b732841599a803455e # Testing and dev cython From db18cb559850c6542dffd03f38a250610d2e1d85 Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 29 Jul 2019 15:40:55 +0200 Subject: [PATCH 5/9] fixed small bug when setting deconvoluted peaks --- pymzml/spec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymzml/spec.py b/pymzml/spec.py index 3ed6b0c6..34ec4357 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -1051,7 +1051,7 @@ def set_peaks(self, peaks, peak_type): elif peak_type == 'deconvoluted': self._peak_dict['deconvoluted'] = peaks self._mz = self.peaks('raw')[:,0] - self._i = self.peaks('raw') + self._i = self.peaks('raw')[:,1] else: raise Exception( 'Peak type is not suppported\n' From 90a770f9bb155cbfde264f61af3b929ebe649811 Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 29 Jul 2019 16:59:09 +0200 Subject: [PATCH 6/9] remove print functions --- tests/main_spec_test.py | 4 ---- tests/ms2_spec_test.py | 2 -- 2 files changed, 6 deletions(-) diff --git a/tests/main_spec_test.py b/tests/main_spec_test.py index cab9bf78..09eecf87 100755 --- a/tests/main_spec_test.py +++ b/tests/main_spec_test.py @@ -103,9 +103,7 @@ def test_centroid_peaks(self): """ """ self.spec.set_peaks([(1, 10), (2, 20), (3, 30), (4, 40)], "centroided") - print(self.spec._peak_dict) c_peaks = self.spec.peaks("centroided") - print(c_peaks[:4]) self.assertPeaksIdentical(c_peaks, [(1, 10), (2, 20), (3, 30), (4, 40)]) def test_centroid_peaks_for_real(self): @@ -134,10 +132,8 @@ def test_add(self): """ spec = self.Run[6] p1 = spec.peaks("reprofiled") - print(p1[:5]) spec += spec p2 = spec.peaks("reprofiled") - print(p2[:5]) self.assertPeaksIdentical( p1, p2, mult=2 ) # , msg='List 1 : {0}\nList 2:{1}'.format(p1, p2)) diff --git a/tests/ms2_spec_test.py b/tests/ms2_spec_test.py index 728a6ee8..3e9037f4 100755 --- a/tests/ms2_spec_test.py +++ b/tests/ms2_spec_test.py @@ -51,11 +51,9 @@ def test_deconvolute_peaks(self): test_mz = 430.313 arr = np.array([(test_mz, 100), (test_mz + PROTON/charge, 49)]) spec = self.Run[2548] - print(spec.ms_level) spec.set_peaks(arr, 'centroided') decon = spec.peaks('deconvoluted') self.assertEqual(len(decon), 1) - print(decon) decon_mz = (test_mz * charge) - charge * PROTON self.assertEqual(decon[0][0], decon_mz) self.assertEqual(decon[0][1], 149) # 149 since itensities are 100 and 49 From d0ded95829ca60d8a30a00f834167f093c4938d0 Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 29 Jul 2019 17:07:47 +0200 Subject: [PATCH 7/9] black --- pymzml/plot.py | 6 +++--- pymzml/spec.py | 10 +++++----- tests/ms2_spec_test.py | 12 +++++++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pymzml/plot.py b/pymzml/plot.py index d88b1589..82db0977 100755 --- a/pymzml/plot.py +++ b/pymzml/plot.py @@ -460,9 +460,9 @@ def add( x_values.append(x[0]) y_values.append(x[1]) - elif style[0] == 'points': - mode = 'markers' - shape = 'linear' + elif style[0] == "points": + mode = "markers" + shape = "linear" x_values = x_vals y_values = y_vals diff --git a/pymzml/spec.py b/pymzml/spec.py index 8b251cdd..8c96cc76 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -1049,11 +1049,11 @@ def set_peaks(self, peaks, peak_type): try: self._peak_dict["reprofiled"] = dict(peaks) except TypeError: - self._peak_dict['reprofiled'] = None - elif peak_type == 'deconvoluted': - self._peak_dict['deconvoluted'] = peaks - self._mz = self.peaks('raw')[:,0] - self._i = self.peaks('raw')[:,1] + self._peak_dict["reprofiled"] = None + elif peak_type == "deconvoluted": + self._peak_dict["deconvoluted"] = peaks + self._mz = self.peaks("raw")[:, 0] + self._i = self.peaks("raw")[:, 1] else: raise Exception( "Peak type is not suppported\n" diff --git a/tests/ms2_spec_test.py b/tests/ms2_spec_test.py index 3e9037f4..1964bd48 100755 --- a/tests/ms2_spec_test.py +++ b/tests/ms2_spec_test.py @@ -1,7 +1,7 @@ import sys import os -sys.path.append(os.path.abspath('.')) +sys.path.append(os.path.abspath(".")) from pymzml.spec import PROTON import pymzml.run as run @@ -9,6 +9,7 @@ import test_file_paths import numpy as np + class SpectrumMS2Test(unittest.TestCase): """ BSA test file @@ -49,15 +50,16 @@ def test_select_precursors(self): def test_deconvolute_peaks(self): charge = 3 test_mz = 430.313 - arr = np.array([(test_mz, 100), (test_mz + PROTON/charge, 49)]) + arr = np.array([(test_mz, 100), (test_mz + PROTON / charge, 49)]) spec = self.Run[2548] - spec.set_peaks(arr, 'centroided') - decon = spec.peaks('deconvoluted') + spec.set_peaks(arr, "centroided") + decon = spec.peaks("deconvoluted") self.assertEqual(len(decon), 1) decon_mz = (test_mz * charge) - charge * PROTON self.assertEqual(decon[0][0], decon_mz) - self.assertEqual(decon[0][1], 149) # 149 since itensities are 100 and 49 + self.assertEqual(decon[0][1], 149) # 149 since itensities are 100 and 49 self.assertEqual(decon[0][2], 3) + if __name__ == "__main__": unittest.main(verbosity=3) From bb1076ecf23f001eeed0f773fab6be49e9c28e1f Mon Sep 17 00:00:00 2001 From: Manuel Date: Tue, 30 Jul 2019 10:25:53 +0200 Subject: [PATCH 8/9] Make ms_deisotope optional dependency - add ms_deisotope in setup.py - make pymzML check if ms_deisotope is installed --- pymzml/plot.py | 6 +++--- pymzml/spec.py | 44 +++++++++++++++++++++++++----------------- setup.py | 4 +++- tests/ms2_spec_test.py | 12 +++++++----- 4 files changed, 39 insertions(+), 27 deletions(-) diff --git a/pymzml/plot.py b/pymzml/plot.py index d88b1589..82db0977 100755 --- a/pymzml/plot.py +++ b/pymzml/plot.py @@ -460,9 +460,9 @@ def add( x_values.append(x[0]) y_values.append(x[1]) - elif style[0] == 'points': - mode = 'markers' - shape = 'linear' + elif style[0] == "points": + mode = "markers" + shape = "linear" x_values = x_vals y_values = y_vals diff --git a/pymzml/spec.py b/pymzml/spec.py index 8b251cdd..7c89bdc2 100755 --- a/pymzml/spec.py +++ b/pymzml/spec.py @@ -49,9 +49,14 @@ from struct import unpack import numpy as np -from ms_deisotope.deconvolution import deconvolute_peaks -from ms_peak_picker import simple_peak - +try: + DECON_DEP = True + from ms_deisotope.deconvolution import deconvolute_peaks + from ms_peak_picker import simple_peak +except ImportError: + DECON_DEP = True + print('[Warning] ms_deisotope is not installed, spectrum deconvolution is not possible.') + print('To enable deconvolution, please use pip install ms_deisotope.') from . import regex_patterns from .decoder import MSDecoder @@ -1016,16 +1021,19 @@ def peaks(self, peak_type): return peaks def _deconvolute_peaks(self, *args, **kwargs): - peaks = self.peaks("centroided") - # pack peak matrix into expected structure - peaks = [simple_peak(p[0], p[1], 0.01) for p in peaks] - decon_result = deconvolute_peaks(peaks, *args, **kwargs) - dpeaks = decon_result.peak_set - # pack deconvoluted peak list into matrix structure - dpeaks_mat = np.zeros((len(dpeaks), 3), dtype=float) - for i, dp in enumerate(dpeaks): - dpeaks_mat[i, :] = dp.neutral_mass, dp.intensity, dp.charge - return dpeaks_mat + if DECON_DEP: + peaks = self.peaks("centroided") + # pack peak matrix into expected structure + peaks = [simple_peak(p[0], p[1], 0.01) for p in peaks] + decon_result = deconvolute_peaks(peaks, *args, **kwargs) + dpeaks = decon_result.peak_set + # pack deconvoluted peak list into matrix structure + dpeaks_mat = np.zeros((len(dpeaks), 3), dtype=float) + for i, dp in enumerate(dpeaks): + dpeaks_mat[i, :] = dp.neutral_mass, dp.intensity, dp.charge + return dpeaks_mat + else: + print('ms_deisotope is missing, please install using pip install ms_deisotope') def set_peaks(self, peaks, peak_type): """ @@ -1049,11 +1057,11 @@ def set_peaks(self, peaks, peak_type): try: self._peak_dict["reprofiled"] = dict(peaks) except TypeError: - self._peak_dict['reprofiled'] = None - elif peak_type == 'deconvoluted': - self._peak_dict['deconvoluted'] = peaks - self._mz = self.peaks('raw')[:,0] - self._i = self.peaks('raw')[:,1] + self._peak_dict["reprofiled"] = None + elif peak_type == "deconvoluted": + self._peak_dict["deconvoluted"] = peaks + self._mz = self.peaks("raw")[:, 0] + self._i = self.peaks("raw")[:, 1] else: raise Exception( "Peak type is not suppported\n" diff --git a/setup.py b/setup.py index 4e4505d9..cc8fd831 100755 --- a/setup.py +++ b/setup.py @@ -24,15 +24,17 @@ python_requires = '>=3.4.0', install_requires = [ 'numpy >= 1.8.0', - 'regex' + 'regex', ], extras_require = { 'full': [ 'plotly < 2.0', 'pynumpress>=0.0.4', + 'ms_deisotope', ], 'plot': ['plotly < 2.0'], 'pynumpress': ['pynumpress>=0.0.4'], + 'deconvolution': ['ms_deisotope'] }, description = 'high-throughput mzML parsing', long_description = 'pymzML - python module for mzML parsing', diff --git a/tests/ms2_spec_test.py b/tests/ms2_spec_test.py index 3e9037f4..1964bd48 100755 --- a/tests/ms2_spec_test.py +++ b/tests/ms2_spec_test.py @@ -1,7 +1,7 @@ import sys import os -sys.path.append(os.path.abspath('.')) +sys.path.append(os.path.abspath(".")) from pymzml.spec import PROTON import pymzml.run as run @@ -9,6 +9,7 @@ import test_file_paths import numpy as np + class SpectrumMS2Test(unittest.TestCase): """ BSA test file @@ -49,15 +50,16 @@ def test_select_precursors(self): def test_deconvolute_peaks(self): charge = 3 test_mz = 430.313 - arr = np.array([(test_mz, 100), (test_mz + PROTON/charge, 49)]) + arr = np.array([(test_mz, 100), (test_mz + PROTON / charge, 49)]) spec = self.Run[2548] - spec.set_peaks(arr, 'centroided') - decon = spec.peaks('deconvoluted') + spec.set_peaks(arr, "centroided") + decon = spec.peaks("deconvoluted") self.assertEqual(len(decon), 1) decon_mz = (test_mz * charge) - charge * PROTON self.assertEqual(decon[0][0], decon_mz) - self.assertEqual(decon[0][1], 149) # 149 since itensities are 100 and 49 + self.assertEqual(decon[0][1], 149) # 149 since itensities are 100 and 49 self.assertEqual(decon[0][2], 3) + if __name__ == "__main__": unittest.main(verbosity=3) From c633c0e72982cec50339a105d4706245c9b2d741 Mon Sep 17 00:00:00 2001 From: Manuel Date: Mon, 5 Aug 2019 10:29:29 +0200 Subject: [PATCH 9/9] update requirements file to use pypi version of ms_deisotope --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 360c36fc..3dbff1fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ numpy plotly pynumpress >= 0.0.4 regex -git+https://github.com/mobiusklein/ms_deisotope.git#90263474a0d0afcd3f4b79b732841599a803455e +ms_deisotope == 0.0.9 tox