Skip to content

Commit

Permalink
Merge pull request #1 from lehtiolab/dsl2-and-refine-update
Browse files Browse the repository at this point in the history
Dsl2 and refine update
  • Loading branch information
glormph authored May 29, 2024
2 parents 825bb73 + 72d9efb commit fe919d2
Show file tree
Hide file tree
Showing 14 changed files with 525 additions and 165 deletions.
32 changes: 32 additions & 0 deletions .github/workflows/release_container.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: release-container
run-name: Release pushed, build container
on:
release:
types: [published]

jobs:
build-container:
runs-on: ubuntu-latest
steps:
- name: Expose GitHub Runtime
uses: crazy-max/ghaction-github-runtime@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Checkout this repo
uses: actions/checkout@v4

- name: Setup nextflow
uses: nf-core/setup-nextflow@v1

- name: Get version
id: get-version
run: wf_version=$(nextflow config -value manifest.version)

- name: Build container
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: lehtiolab/nfhelaqc:${{ steps.get-version.outputs.wf_version }}
28 changes: 28 additions & 0 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: integration-tests
run-name: Run integration tests
on:
pull_request:
branches:
- master
push:
branches:
- master

jobs:
integration-tests:
runs-on: ubuntu-latest
steps:
- name: Expose GitHub Runtime
uses: crazy-max/ghaction-github-runtime@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Setup nextflow
uses: nf-core/setup-nextflow@v1

- name: Checkout this repo
uses: actions/checkout@v4

- name: Run tests
run: bash run_tests.sh
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.*.sw[poq]
18 changes: 11 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
FROM nfcore/base
LABEL description="Docker image containing all requirements for lehtiolab/helaqc pipeline"
FROM mambaorg/micromamba:1.5.8-bookworm
# This only installs dinosaur
LABEL description="Additional stuff and dinosaur which does not work in biocontainer due to lack of fontconfig"

COPY environment.yml /
ARG NEW_MAMBA_USER=mambauser
ARG NEW_MAMBA_USER_ID=1
ARG NEW_MAMBA_USER_GID=1
USER root

RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/helaqc-2.2/bin:$PATH
# to have envsubst, ps
RUN apt update && apt install -y gettext-base procps

# For dinosaur
RUN apt update && apt install -y fontconfig && apt clean -y
# for dinosaur
RUN micromamba install -y -n base -c conda-forge -c bioconda dinosaur=1.2.0
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Internal QC and mzML precursor refining pipeline

## Usage
Mainly internal, but if you must:

```
nextflow run -resume -profile docker qc.nf \
--db your-peptides.fa \
--instrument [timstof, qe] \ # pick one of these
--raw your-QC-run.raw
```

When testing, instead of `--raw`, we use `--mzml` to keep files small.


## Development

For local work:
```
bash run_tests.sh
```

On github actions we run on PR/push to master.


## Releasing
Create a PR, make changes, update the version etc. Make sure you publish a release to update the container.


## Todo:
- Block PRs where there is no version change
- Fix local testing script
46 changes: 46 additions & 0 deletions assets/sage.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"database": {
"bucket_size": 8192,
"enzyme": {
"missed_cleavages": 2,
"min_len": 7,
"max_len": 50,
"cleave_at": "KR",
"restrict": "P",
"c_terminal": true
},
"fragment_min_mz": 200.0,
"fragment_max_mz": 2000.0,
"peptide_min_mass": 500.0,
"peptide_max_mass": 5000.0,
"ion_kinds": ["a", "b", "y"],
"min_ion_index": 2,
"static_mods": {
"C": 57.0215
},
"variable_mods": {
"M": [15.9949]
},
"max_variable_mods": 2,
"decoy_tag": "decoy_",
"generate_decoys": false
},
"precursor_tol": {
"ppm": [-${PRECTOL}, ${PRECTOL}]
},
"fragment_tol": {
"ppm": [-${FRAGTOL}, ${FRAGTOL}]
},
"precursor_charge": [2, 6],
"isotope_errors": [ -1, 2 ],
"deisotope": false,
"chimera": false,
"wide_window": false,
"predict_rt": false,
"min_peaks": 7,
"max_peaks": 150,
"min_matched_peaks": 3,
"max_fragment_charge": 2,
"report_psms": 1,
"output_directory": "./"
}
12 changes: 6 additions & 6 deletions bin/parse_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,17 @@ def parse_wc_output(wc_out):

with open('tpsms') as fp:
header = next(fp).strip('\n').split('\t')
perrorix = header.index('PrecursorError(ppm)')
perrorix = header.index('precursor_ppm')
calc_ms1data = True
try:
fwhmix = header.index('FWHM')
except ValueError:
print('No FWHM in PSM table, probably --noms1 is specified')
calc_ms1data = False
msgfix = header.index('MSGFScore')
rtix = header.index('Retention time(min)')
misclix = header.index('missed_cleavage')
ionmobix = header.index('Ion mobility(Vs/cm2)')
msgfix = header.index('sage_discriminant_score')
rtix = header.index('rt')
misclix = header.index('missed_cleavages')
ionmobix = header.index('ion_mobility')
use_ionmob = False
qcpsms = []
for line in fp:
Expand All @@ -66,7 +66,7 @@ def parse_wc_output(wc_out):
qcout['precursor_errors'] = calc_boxplot_qs([psm[perrorix] for psm in qcpsms])
if calc_ms1data:
qcout['fwhms'] = calc_boxplot_qs([psm[fwhmix] for psm in qcpsms])
qcout['msgfscores'] = calc_boxplot_qs([psm[msgfix] for psm in qcpsms])
qcout['sagescores'] = calc_boxplot_qs([psm[msgfix] for psm in qcpsms])
qcout['retention_times'] = calc_boxplot_qs([psm[rtix] for psm in qcpsms])
if use_ionmob:
qcout['ionmobilities'] = calc_boxplot_qs([psm[ionmobix] for psm in qcpsms])
Expand Down
30 changes: 7 additions & 23 deletions configuration/base.config
Original file line number Diff line number Diff line change
@@ -1,25 +1,9 @@
params {
mods = "${baseDir}/data/labelfreemods.txt"
}

process {
cpus = 1
// Docker options only, withName doesnt work on docker scope and we need different options
// for the msconvert process, so set it through process scope instead of docker.runOptions
// Not sure how this would affect singularity containers
containerOptions = '-u $(id -u):$(id -g)'
withName: msgfPlus {
cpus = 4
}
withName: dinosaur {
cpus = 4
}
withName: msconvert {
containerOptions = '--rm'
container = 'chambm/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.20066-729ef9c41'
}
withName: mzRefine {
containerOptions = '--rm'
container = 'chambm/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.20066-729ef9c41'
}
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { check_max( 2.h * task.attempt, 'time' ) }

errorStrategy = { task.exitStatus in [140,137] ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'
}
10 changes: 0 additions & 10 deletions environment.yml

This file was deleted.

133 changes: 120 additions & 13 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -1,28 +1,135 @@
process.container = 'lehtiolab/helaqc:2.2'
params {
max_memory = 128.GB
max_cpus = 16
max_time = 240.h

test = false

// both refine and qc:
db = false
instrument = false
outdir = 'results'

// QC
raw = false
mzml = false
mods = "${baseDir}/data/labelfreemods.txt"
noquant = false
overbook_cpus_factor = 1
filters = false
options = false
psmconf = 0.01
pepconf = 0.01

prectol = false
fragtol = false

// refine:
isobaric = 'lf'
}


nextflow.enable.configProcessNamesValidation = false
includeConfig 'configuration/base.config'

external_config_version = 'main'


profiles {

standard {
includeConfig 'configuration/base.config'
}
qc {
includeConfig 'configuration/base.config'
process {
clusterOptions = '--qos=qc'
}
params {
threadspercore = 2
}
process.clusterOptions = '--qos=qc'
params.threadspercore = 2
includeConfig "https://raw.githubusercontent.com/lehtiolab/static-resources/${external_config_version}/nf-configs/qc.config"
}

docker {
docker {
enabled = true
fixOwnership = true
}
}

test {
docker {
enabled = true
fixOwnership = true
}
params.test = true
process.memory = 15.GB
}

lehtio {
includeConfig 'configuration/base.config'
includeConfig 'configuration/lehtio.config'
includeConfig "https://raw.githubusercontent.com/lehtiolab/static-resources/${external_config_version}/nf-configs/lehtio.config"
trace {
file = './trace.txt'
overwrite = true
}
params {
db = "${data_file_path}/ENS111_Homo_sapiens.fa"
}
}
}


process {
withName: sage {
cpus = 4
}

withName: dinosaur {
cpus = 2
}

withName: msconvert {
containerOptions = '--rm'
container = 'chambm/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.20066-729ef9c41'
}

withName: mzRefine {
containerOptions = '--rm'
container = 'chambm/pwiz-skyline-i-agree-to-the-vendor-licenses:3.0.20066-729ef9c41'
}
}

manifest {
name = 'lehtiolab/nf-mslab'
author = 'Jorrit Boekel'
homePage = 'https://github.com/lehtiolab/nf-mslab'
description = 'HeLa QC and refine precursor in mzML pipelines'
nextflowVersion = '==24.04.1'
version = '3.0'
}

def check_max(obj, type) {
if (type == 'memory') {
test = obj as nextflow.util.MemoryUnit
try {
if (test.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'time') {
test = obj as nextflow.util.Duration
try {
if (test.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'cpus') {
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}
Loading

0 comments on commit fe919d2

Please sign in to comment.