Skip to content

Commit

Permalink
Merge branch 'master' into fix/numpy-yte-parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
schrins committed Dec 3, 2024
2 parents 9170582 + 7c24fb8 commit 8058ee0
Show file tree
Hide file tree
Showing 15 changed files with 64 additions and 85 deletions.
1 change: 0 additions & 1 deletion .test/config-chm-eval/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ ref:

primers:
trimming:
activate: false
primers_fa1: ""
primers_fa2: ""
library_length: 0
Expand Down
5 changes: 2 additions & 3 deletions .test/config-giab/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ ref:

primers:
trimming:
activate: false
# path to fasta files containg primer sequences
primers_fa1: "path/to/primer-fa1"
primers_fa2: "path/to/primer-fa2"
primers_fa1: ""
primers_fa2: ""
# Library mean + error determines the maximum insert size between the outer primer ends.
# Specify 0 to have yara autodetect the primer library insert size error.
library_error: 0
Expand Down
1 change: 0 additions & 1 deletion .test/config-no-candidate-filtering/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ ref:

primers:
trimming:
activate: false
primers_fa1: ""
primers_fa2: ""
library_length: 0
Expand Down
1 change: 0 additions & 1 deletion .test/config-simple/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ ref:

primers:
trimming:
activate: false
primers_fa1: ""
primers_fa2: ""
library_error: 0
Expand Down
1 change: 0 additions & 1 deletion .test/config-sra/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ ref:

primers:
trimming:
activate: false
primers_fa1: ""
primers_fa2: ""
library_length: 0
Expand Down
1 change: 0 additions & 1 deletion .test/config-target-regions/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ ref:

primers:
trimming:
activate: false
primers_fa1: ""
primers_fa2: ""
library_length: 0
Expand Down
1 change: 0 additions & 1 deletion .test/config-target-regions/config_multiple_beds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ ref:

primers:
trimming:
activate: false
primers_fa1: ""
primers_fa2: ""
library_length: 0
Expand Down
1 change: 0 additions & 1 deletion .test/config_primers/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ ref:

primers:
trimming:
activate: true
primers_fa1: "a.scerevisiae.1_primers.fq"
primers_fa2: "a.scerevisiae.2_primers.fq"
library_length: 400
Expand Down
1 change: 1 addition & 0 deletions config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Defining primers directly in the config file is prefered when all samples come f
In case of different panels, primers have to be set panel-wise in a seperate tsv-file.
For each panel the following columns need to be set: `panel`, `fa1` and `fa2` (optional).
Additionally, for each sample the corresponding panel must be defined in `samples.tsv` (column `panel`).
If a panel is not provided for a sample, trimming will not be performed on that sample.
For single primer trimming only, the first entry in the config (respective in the tsv file) needs to be defined.

# Annotating UMIS
Expand Down
9 changes: 5 additions & 4 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ ref:
# This is usually only relevant for testing.
# chromosome: 21

# Trimming will be applied if global primer sequences are
# provided or primer panels are set in samplesheet
primers:
trimming:
activate: false
# path to fasta files containg primer sequences
primers_fa1: "path/to/primer-fa1"
primers_fa2: "path/to/primer-fa2"
# path to fasta files containing primer sequences
primers_fa1: ""
primers_fa2: ""
# optional primer file allowing to define primers per sample
# overwrites primers_fa1 and primers_fa2
# the tsv file requires three fields: panel, fa1 and fa2 (optional)
Expand Down
23 changes: 0 additions & 23 deletions workflow/resources/datavzrd/clinical_significance.js

This file was deleted.

19 changes: 10 additions & 9 deletions workflow/resources/datavzrd/variant-calls-template.datavzrd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ views:
url: https://www.ensembl.org/Homo_sapiens/Transcript/Summary?t={feature}
'clinical significance':
optional: true
custom: ?read_file(input.clin_sig)
spell:
url: v1.2.1/med/clin-sig
hgvsp:
custom: ?hgvs_content
hgvsc:
Expand Down Expand Up @@ -305,18 +306,18 @@ views:
optional: true
display-mode: detail
spell:
url: v1.1.3/logic/boolean
url: v1.2.1/logic/boolean
with:
true_value: "True"
false_value: ""
false_value: "False"
mane_plus_clinical:
optional: true
display-mode: detail
spell:
url: v1.1.3/logic/boolean
url: v1.2.1/logic/boolean
with:
true_value: "True"
false_value: ""
false_value: "False"
?for alias in params.samples.loc[params.samples["group"] == group, "alias"]:
'?f"{alias}: short observations"':
optional: true
Expand Down Expand Up @@ -476,18 +477,18 @@ views:
optional: true
display-mode: detail
spell:
url: v1.1.3/logic/boolean
url: v1.2.1/logic/boolean
with:
true_value: "True"
false_value: ""
false_value: "False"
mane_plus_clinical:
optional: true
display-mode: detail
spell:
url: v1.1.3/logic/boolean
url: v1.2.1/logic/boolean
with:
true_value: "True"
false_value: ""
false_value: "False"
?for alias in params.samples.loc[params.samples["group"] == group, "alias"]:
'?f"{alias}: short observations"':
optional: true
Expand Down
82 changes: 46 additions & 36 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -260,20 +260,11 @@ def get_control_fdr_input(wildcards):
return "results/final-calls/{group}.{calling_type}.annotated.bcf"


def get_recalibrate_quality_input(wildcards, bai=False):
ext = "bai" if bai else "bam"
datatype = get_sample_datatype(wildcards.sample)
if datatype == "rna":
return "results/split/{{sample}}.{ext}".format(ext=ext)
# Post-processing of DNA samples
if is_activated("calc_consensus_reads"):
return "results/consensus/{{sample}}.{ext}".format(ext=ext)
elif is_activated("primers/trimming"):
return "results/trimmed/{{sample}}.trimmed.{ext}".format(ext=ext)
elif is_activated("remove_duplicates"):
return "results/dedup/{{sample}}.{ext}".format(ext=ext)
def get_aligner(wildcards):
if get_sample_datatype(wildcards.sample) == "rna":
return "star"
else:
return "results/mapped/bwa/{{sample}}.{ext}".format(ext=ext)
return "bwa"


def get_cutadapt_input(wildcards):
Expand Down Expand Up @@ -376,11 +367,6 @@ def is_paired_end(sample):
return all_paired


def group_is_paired_end(group):
samples = get_group_samples(group)
return all([is_paired_end(sample) for sample in samples])


def get_map_reads_input(wildcards):
if is_paired_end(wildcards.sample):
return [
Expand Down Expand Up @@ -428,31 +414,42 @@ def get_sample_datatype(sample):


def get_markduplicates_input(wildcards):
aligner = "star" if get_sample_datatype(wildcards.sample) == "rna" else "bwa"
aligner = get_aligner(wildcards)
if sample_has_umis(wildcards.sample):
return "results/mapped/{aligner}/{{sample}}.annotated.bam".format(
aligner=aligner
)
return f"results/mapped/{aligner}/{{sample}}.annotated.bam"
else:
return f"results/mapped/{aligner}/{{sample}}.bam"


def get_recalibrate_quality_input(wildcards, bai=False):
ext = "bai" if bai else "bam"
datatype = get_sample_datatype(wildcards.sample)
if datatype == "rna":
return "results/split/{{sample}}.{ext}".format(ext=ext)
# Post-processing of DNA samples
if is_activated("calc_consensus_reads"):
return "results/consensus/{{sample}}.{ext}".format(ext=ext)
else:
return "results/mapped/{aligner}/{{sample}}.bam".format(aligner=aligner)
return get_consensus_input(wildcards, bai)


def get_consensus_input(wildcards):
if is_activated("primers/trimming"):
return "results/trimmed/{sample}.trimmed.bam"
elif is_activated("remove_duplicates"):
return "results/dedup/{sample}.bam"
def get_consensus_input(wildcards, bai=False):
ext = "bai" if bai else "bam"
if sample_has_primers(wildcards):
return "results/trimmed/{{sample}}.trimmed.{ext}".format(ext=ext)
else:
aligner = "star" if get_sample_datatype(wildcards.sample) == "rna" else "bwa"
return "results/mapped/{aligner}/{{sample}}.bam".format(aligner=aligner)
return get_trimming_input(wildcards, bai)


def get_trimming_input(wildcards):
def get_trimming_input(wildcards, bai=False):
ext = "bai" if bai else "bam"
if is_activated("remove_duplicates"):
return "results/dedup/{sample}.bam"
return "results/dedup/{{sample}}.{ext}".format(ext=ext)
else:
aligner = "star" if get_sample_datatype(wildcards.sample) == "rna" else "bwa"
return "results/mapped/{aligner}/{{sample}}.bam".format(aligner=aligner)
aligner = get_aligner(wildcards)
return "results/mapped/{aligner}/{{sample}}.{ext}".format(
aligner=aligner, ext=ext
)


def get_primer_bed(wc):
Expand Down Expand Up @@ -549,8 +546,6 @@ def get_markduplicates_extra(wc):

def get_group_bams(wildcards, bai=False):
ext = "bai" if bai else "bam"
if is_activated("primers/trimming") and not group_is_paired_end(wildcards.group):
WorkflowError("Primer trimming is only available for paired end data.")
return expand(
"results/recal/{sample}.{ext}",
sample=get_group_samples(wildcards.group),
Expand Down Expand Up @@ -1373,6 +1368,21 @@ def get_umi_fastq(wildcards):
return umi_read


def sample_has_primers(wildcards):
sample_name = wildcards.sample

if config["primers"]["trimming"].get("primers_fa1") or (
"panel" in samples.columns
and samples.loc[samples["sample_name"] == sample_name, "panel"].notna().any()
):
if not is_paired_end(sample_name):
raise WorkflowError(
f"Primer trimming is only available for paired-end data. Sample '{sample_name}' is not paired-end."
)
return True
return False


def sample_has_umis(sample):
return pd.notna(extract_unique_sample_column_value(sample, "umi_read"))

Expand Down
1 change: 0 additions & 1 deletion workflow/rules/datavzrd.smk
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ rule datavzrd_variants_calls:
spec_short_observations=workflow.source_path(
"../resources/datavzrd/spec_short_observations.json"
),
clin_sig=workflow.source_path("../resources/datavzrd/clinical_significance.js"),
data_short_observations=workflow.source_path(
"../resources/datavzrd/data_short_observations.js"
),
Expand Down
2 changes: 0 additions & 2 deletions workflow/rules/mapping.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ rule merge_untrimmed_fastqs:
get_untrimmed_fastqs,
output:
temp("results/untrimmed/{sample}_{read}.fastq.gz"),
conda:
"../envs/fgbio.yaml"
log:
"logs/merge-fastqs/untrimmed/{sample}_{read}.log",
wildcard_constraints:
Expand Down

0 comments on commit 8058ee0

Please sign in to comment.