From aba5750d55c0ac495ab0e7e1b3af8ca33850346f Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 23 Nov 2023 00:45:41 +0100 Subject: [PATCH 1/3] FEAT using mp for file loading under linux --- alpharaw/thermo.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py index d0c98f0..a09fc75 100644 --- a/alpharaw/thermo.py +++ b/alpharaw/thermo.py @@ -118,7 +118,7 @@ class ThermoRawData(MSData_Base): def __init__(self, centroided : bool = True, process_count : int = 10, - mp_batch_size : int = 10000, + mp_batch_size : int = 5000, **kwargs): """ Parameters @@ -148,17 +148,15 @@ def _import(self, first_spectrum_number = rawfile.FirstSpectrumNumber last_spectrum_number = rawfile.LastSpectrumNumber - if platform.system() != 'Linux': - batches = np.arange(first_spectrum_number, last_spectrum_number+1, self.mp_batch_size) - batches = np.append(batches, last_spectrum_number+1) + mode = 'spawn' if platform.system() != 'Linux' else 'forkserver' + + batches = np.arange(first_spectrum_number, last_spectrum_number+1, self.mp_batch_size) + batches = np.append(batches, last_spectrum_number+1) - # use multiprocessing to load batches - _import_batch_partial = partial(_import_batch, raw_file_path, self.centroided) - with mp.get_context("spawn").Pool(processes = self.process_count) as pool: - batches = list(tqdm(pool.imap(_import_batch_partial, zip(batches[:-1], batches[1:])))) - - else: - batches = [_import_batch(raw_file_path, self.centroided, (first_spectrum_number, last_spectrum_number+1))] + # use multiprocessing to load batches + _import_batch_partial = partial(_import_batch, raw_file_path, self.centroided) + with mp.get_context(mode).Pool(processes = self.process_count) as pool: + batches = list(tqdm(pool.imap(_import_batch_partial, zip(batches[:-1], batches[1:])))) # collect peak indices _peak_indices = np.concatenate([batch['_peak_indices'] for batch in batches]) From 8e39a1baefa4b55d8436e3e897caccf307998d43 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 23 Nov 2023 01:00:34 +0100 Subject: [PATCH 2/3] FIX disable warnings --- alpharaw/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/alpharaw/__init__.py b/alpharaw/__init__.py index 29b5952..8b6b502 100644 --- a/alpharaw/__init__.py +++ b/alpharaw/__init__.py @@ -1,5 +1,8 @@ #!python +import warnings +warnings.filterwarnings("ignore") + def register_readers(): from .ms_data_base import ms_reader_provider from .legacy_msdata import mgf From 930c3fa9f30d62ec32f6f58ca2e05ae1826f2986 Mon Sep 17 00:00:00 2001 From: GeorgWa Date: Thu, 23 Nov 2023 01:20:18 +0100 Subject: [PATCH 3/3] FIX copy memory explicitly --- alpharaw/thermo.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/alpharaw/thermo.py b/alpharaw/thermo.py index a09fc75..edb0861 100644 --- a/alpharaw/thermo.py +++ b/alpharaw/thermo.py @@ -98,18 +98,21 @@ def _import_batch( isolation_mz_uppers.append(isolation_center + isolation_width / 2) precursor_charges.append(charge) rawfile.Close() + + # copys of numpy arrays are needed to move them explicitly to cpython heap + # otherwise mono might interfere later return { '_peak_indices': _peak_indices, - 'peak_mz': np.concatenate(mz_values), - 'peak_intensity': np.concatenate(intensity_values), - 'rt': np.array(rt_values), - 'precursor_mz': np.array(precursor_mz_values), - 'precursor_charge': np.array(precursor_charges, dtype=np.int8), - 'isolation_lower_mz': np.array(isolation_mz_lowers), - 'isolation_upper_mz': np.array(isolation_mz_uppers), - 'ms_level': np.array(ms_order_list, dtype=np.int8), - 'nce': np.array(ce_list, dtype=np.float32), - 'injection_time': np.array(injection_time_list, dtype=np.float32) + 'peak_mz': np.concatenate(mz_values).copy(), + 'peak_intensity': np.concatenate(intensity_values).copy(), + 'rt': np.array(rt_values).copy(), + 'precursor_mz': np.array(precursor_mz_values).copy(), + 'precursor_charge': np.array(precursor_charges, dtype=np.int8).copy(), + 'isolation_lower_mz': np.array(isolation_mz_lowers).copy(), + 'isolation_upper_mz': np.array(isolation_mz_uppers).copy(), + 'ms_level': np.array(ms_order_list, dtype=np.int8).copy(), + 'nce': np.array(ce_list, dtype=np.float32).copy(), + 'injection_time': np.array(injection_time_list, dtype=np.float32).copy() } class ThermoRawData(MSData_Base): """