Skip to content

Commit

Permalink
Merge pull request #53 from MannLabs/development
Browse files Browse the repository at this point in the history
preparation for v1.4.0
  • Loading branch information
GeorgWa authored Nov 11, 2023
2 parents aae406f + 7577fde commit 0c6acdc
Show file tree
Hide file tree
Showing 55 changed files with 4,238 additions and 521 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/pip_installation.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
on:
push:
branches: [ main, development ]
pull_request:
branches: [ main, development ]
workflow_dispatch:
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ dmypy.json
# Pyre type checker
.pyre/

# Data
testdata/

######################
# OS generated files #
######################
Expand All @@ -156,4 +159,4 @@ gui/out/
###################
# mono crash logs #
###################
mono_crash*
mono_crash*
4 changes: 2 additions & 2 deletions alphadia/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__project__ = "alphadia"
__version__ = "1.3.2"
__version__ = "1.4.0"
__license__ = "Apache"
__description__ = "An open-source Python package of the AlphaPept ecosystem"
__author__ = "Mann Labs"
Expand All @@ -13,7 +13,7 @@
"software",
"AlphaPept ecosystem",
]
__python_version__ = ">=3.8,<3.10"
__python_version__ = ">=3.8,<=3.12"
__classifiers__ = [
"Development Status :: 1 - Planning",
# "Development Status :: 2 - Pre-Alpha",
Expand Down
90 changes: 62 additions & 28 deletions alphadia/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import logging
import time
import yaml
import os

# alphadia imports
import alphadia
from alphadia.workflow import reporting
from alphadia import utils

# alpha family imports

Expand All @@ -22,9 +24,8 @@
)

@click.pass_context
@click.version_option(alphadia.__version__, "-v", "--version")
@click.version_option(alphadia.__version__, "-v", "--version", message="%(version)s")
def run(ctx, **kwargs):

if ctx.invoked_subcommand is None:
click.echo(run.get_help(ctx))

Expand All @@ -38,7 +39,7 @@ def gui():
help="Extract DIA precursors from a list of raw files using a spectral library."
)
@click.argument(
"output-location",
"output-directory",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
required=False,
)
Expand All @@ -49,12 +50,26 @@ def gui():
multiple=True,
type=click.Path(exists=True, file_okay=True, dir_okay=True),
)
@click.option(
'--directory',
'-d',
help="Directory containing raw data input files.",
type=click.Path(exists=True, file_okay=False, dir_okay=True),
)
@click.option(
'--library',
'-l',
help="Spectral library in AlphaBase hdf5 format.",
type=click.Path(exists=True, file_okay=True, dir_okay=False),
)
@click.option(
'--wsl',
'-w',
help="Run alphadia using WSL. Windows paths will be converted to WSL paths.",
type=bool,
default=False,
is_flag=True,
)
@click.option(
"--fdr",
help='False discovery rate for the final output.',
Expand Down Expand Up @@ -112,38 +127,59 @@ def extract(**kwargs):
with open(kwargs['config'], 'r') as f:
config_update = yaml.safe_load(f)

output_location = None
if kwargs['output_location'] is not None:
output_location = kwargs['output_location']
# update output directory based on config file
output_directory = None
if kwargs['output_directory'] is not None:
if kwargs['wsl']:
kwargs['output_directory'] = utils.windows_to_wsl(kwargs['output_directory'])
output_directory = kwargs['output_directory']

if "output" in config_update:
output_location = config_update['output']
if "output_directory" in config_update:
if kwargs['wsl']:
config_update['output_directory'] = utils.windows_to_wsl(config_update['output_directory'])
output_directory = config_update['output_directory']

if output_location is None:
logging.error("No output location specified.")

if output_directory is None:
logging.error("No output directory specified.")
return

reporting.init_logging(kwargs['output_location'])
reporting.init_logging(output_directory)
logger = logging.getLogger()

# assert input files have been specified
files = None
files = []
if kwargs['file'] is not None:
files = list(kwargs['file'])
if kwargs['wsl']:
files = [utils.windows_to_wsl(f) for f in files]

# load whole directory if specified
if kwargs['directory'] is not None:
if kwargs['wsl']:
kwargs['directory'] = utils.windows_to_wsl(kwargs['directory'])
files += [os.path.join(kwargs['directory'], f) for f in os.listdir(kwargs['directory'])]

if "files" in config_update:
files = config_update['files'] if type(config_update['files']) is list else [config_update['files']]
# load list of raw files from config file
if "raw_file_list" in config_update:
if kwargs['wsl']:
config_update['raw_file_list'] = [utils.windows_to_wsl(f) for f in config_update['raw_file_list']]
files += config_update['raw_file_list'] if type(config_update['raw_file_list']) is list else [config_update['raw_file_list']]

if (files is None) or (len(files) == 0):
logging.error("No files specified.")
logging.error("No raw files specified.")
return

# assert library has been specified
library = None
if kwargs['library'] is not None:
if kwargs['wsl']:
kwargs['library'] = utils.windows_to_wsl(kwargs['library'])
library = kwargs['library']

if "library" in config_update:
if kwargs['wsl']:
config_update['library'] = utils.windows_to_wsl(config_update['library'])
library = config_update['library']

if library is None:
Expand All @@ -154,27 +190,25 @@ def extract(**kwargs):
for f in files:
logger.progress(f" {f}")
logger.progress(f"Using library {library}.")
logger.progress(f"Saving output to {output_location}.")



if kwargs['wsl']:
config_update['general']['wsl'] = True

logger.progress(f"Saving output to {output_directory}.")

try:

import matplotlib
# important to supress matplotlib output
matplotlib.use('Agg')

from alphabase.spectral_library.base import SpecLibBase
from alphadia.planning import Plan

lib = SpecLibBase()
lib.load_hdf(library, load_mod_seq=True)
#lib._precursor_df['elution_group_idx'] = lib._precursor_df['precursor_idx']

#config_update = eval(kwargs['config_update']) if kwargs['config_update'] else None

plan = Plan(
output_location,
output_directory,
files,
lib,
library,
config_update = config_update
)

Expand All @@ -185,4 +219,4 @@ def extract(**kwargs):
)

except Exception as e:
logging.exception(e)
logger.error(e)
21 changes: 19 additions & 2 deletions alphadia/data/thermo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# native imports
import math
import os
import logging
logger = logging.getLogger()

# alphadia imports
from alphadia import utils
Expand Down Expand Up @@ -114,6 +116,7 @@ def __init__(
self,
path,
astral_ms1=False,
cv=None,
**kwargs
):
super().__init__(**kwargs)
Expand All @@ -122,6 +125,7 @@ def __init__(
self.sample_name = os.path.basename(self.raw_file_path)

self.astral_ms1 = astral_ms1
self.cv = cv
self.filter_spectra()

self.cycle = calculate_cycle(self.spectrum_df)
Expand All @@ -145,16 +149,29 @@ def __init__(
self.frame_max_index = len(self.rt_values)-1

def filter_spectra(self):

print(self.cv, 'cv' in self.spectrum_df.columns)

# filter for astral MS1
if self.astral_ms1:
self.spectrum_df = self.spectrum_df[self.spectrum_df['nce'] > 0.1]
self.spectrum_df.loc[self.spectrum_df['nce'] < 1.1, 'ms_level'] = 1
self.spectrum_df.loc[self.spectrum_df['nce'] < 1.1, 'precursor_mz'] = -1.0
self.spectrum_df.loc[self.spectrum_df['nce'] < 1.1, 'isolation_lower_mz'] = -1.0
self.spectrum_df.loc[self.spectrum_df['nce'] < 1.1, 'isolation_upper_mz'] = -1.0
self.spectrum_df['spec_idx'] = np.arange(len(self.spectrum_df))
else:
self.spectrum_df = self.spectrum_df[(self.spectrum_df['nce'] < 0.1) | (self.spectrum_df['nce'] > 1.1)]
self.spectrum_df['spec_idx'] = np.arange(len(self.spectrum_df))

# filter for cv
if self.cv is not None:
if 'cv' in self.spectrum_df.columns:
# use np.isclose to account for floating point errors
logger.info(f"Filtering for CV {self.cv}")
logger.info(f"Before: {len(self.spectrum_df)}")
self.spectrum_df = self.spectrum_df[np.isclose(self.spectrum_df['cv'], self.cv, atol=0.1)]
logger.info(f"After: {len(self.spectrum_df)}")

self.spectrum_df['spec_idx'] = np.arange(len(self.spectrum_df))

def jitclass(self):
return ThermoJIT(
Expand Down
Loading

0 comments on commit 0c6acdc

Please sign in to comment.