diff --git a/.test/config-chm-eval/config.yaml b/.test/config-chm-eval/config.yaml index a3219ad1c..a8bb79ca5 100644 --- a/.test/config-chm-eval/config.yaml +++ b/.test/config-chm-eval/config.yaml @@ -184,3 +184,8 @@ report: stratify: activate: false by-column: condition + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config-giab/config.yaml b/.test/config-giab/config.yaml index 10e9c94d8..6742e6e7b 100644 --- a/.test/config-giab/config.yaml +++ b/.test/config-giab/config.yaml @@ -157,3 +157,8 @@ params: gene_coverage: min_avg_coverage: 5 + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config-no-candidate-filtering/config.yaml b/.test/config-no-candidate-filtering/config.yaml index 897a5a00e..c77b75259 100644 --- a/.test/config-no-candidate-filtering/config.yaml +++ b/.test/config-no-candidate-filtering/config.yaml @@ -123,3 +123,8 @@ report: stratify: activate: false by-column: condition + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config-simple/config.yaml b/.test/config-simple/config.yaml index de1d124c2..0e0abc552 100644 --- a/.test/config-simple/config.yaml +++ b/.test/config-simple/config.yaml @@ -127,3 +127,8 @@ tables: coverage: true event_prob: true generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config-sra/config.yaml b/.test/config-sra/config.yaml index 22751b786..21df0691d 100644 --- a/.test/config-sra/config.yaml +++ b/.test/config-sra/config.yaml @@ -112,4 +112,9 @@ tables: genotype: true coverage: true event_prob: true - generate_excel: true \ No newline at end of file + generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config-target-regions/config.yaml b/.test/config-target-regions/config.yaml index e1172464d..eb0290d41 100644 --- a/.test/config-target-regions/config.yaml +++ b/.test/config-target-regions/config.yaml @@ -126,3 +126,8 @@ tables: coverage: true event_prob: true generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config-target-regions/config_multiple_beds.yaml b/.test/config-target-regions/config_multiple_beds.yaml index 660845052..ab83cc2ba 100644 --- a/.test/config-target-regions/config_multiple_beds.yaml +++ b/.test/config-target-regions/config_multiple_beds.yaml @@ -124,3 +124,8 @@ tables: coverage: true event_prob: true generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/.test/config_primers/config.yaml b/.test/config_primers/config.yaml index c2921ba8d..15ad0df3d 100644 --- a/.test/config_primers/config.yaml +++ b/.test/config_primers/config.yaml @@ -115,4 +115,9 @@ tables: genotype: true coverage: true event_prob: true - generate_excel: true \ No newline at end of file + generate_excel: true + +custom_alignment_properties: + activate: false + column: "panel" + tsv: "" \ No newline at end of file diff --git a/config/alignment_properties.tsv b/config/alignment_properties.tsv new file mode 100644 index 000000000..7ef0d7401 --- /dev/null +++ b/config/alignment_properties.tsv @@ -0,0 +1 @@ +name path \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index f1deaf493..3eee881b3 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -284,3 +284,12 @@ params: freebayes: min_alternate_fraction: 0.05 # Reduce for calling variants with lower VAFs extra: "" + +# If activated preprocessed alignment properties can be applied to each sample individually. +# Paths to the alignment properties json files need to be set in a tsv file containing a property name and path. +# Alignment properties names will be taken from a customizable column in the sample sheet. +# If no property name is set for a sample or custom_alignment_properties is deactivated the alignment properties will be estimated estimated on the sample's read alignments. +custom_alignment_properties: + activate: false + column: "panel" + tsv: "config/alignment_properties.tsv" \ No newline at end of file diff --git a/workflow/rules/calling.smk b/workflow/rules/calling.smk index e1f8c15de..1f3e49800 100644 --- a/workflow/rules/calling.smk +++ b/workflow/rules/calling.smk @@ -47,7 +47,7 @@ rule varlociraptor_preprocess: candidates=get_candidate_calls, bam="results/recal/{sample}.bam", bai="results/recal/{sample}.bai", - alignment_props="results/alignment-properties/{group}/{sample}.json", + alignment_props=get_alignment_props, output: "results/observations/{group}/{sample}.{caller}.{scatteritem}.bcf", params: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 662278dc7..3848f0066 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -112,6 +112,29 @@ primer_panels = ( ) +def is_activated(xpath): + c = config + for entry in xpath.split("/"): + c = c.get(entry, {}) + return bool(c.get("activate", False)) + + +custom_alignment_props = ( + ( + pd.read_csv( + config["custom_alignment_properties"]["tsv"], + sep="\t", + dtype={"name": str, "path": str}, + comment="#", + ) + .set_index(["name"], drop=False) + .sort_index() + ) + if is_activated("custom_alignment_properties") + else None +) + + def get_calling_events(calling_type): events = [ event @@ -606,13 +629,6 @@ def get_all_group_observations(wildcards): ) -def is_activated(xpath): - c = config - for entry in xpath.split("/"): - c = c.get(entry, {}) - return bool(c.get("activate", False)) - - def get_star_read_group(wildcards): """Denote sample name and platform in read group.""" platform = extract_unique_sample_column_value(wildcards.sample, "platform") @@ -1591,3 +1607,14 @@ def get_delly_excluded_regions(): ) else: return [] + + +def get_alignment_props(wildcards): + if is_activated("custom_alignment_properties"): + alignment_prop_column = config["custom_alignment_properties"]["column"] + prop_name = extract_unique_sample_column_value( + wildcards.sample, alignment_prop_column + ) + if pd.notna(prop_name): + return custom_alignment_props.loc[prop_name, "path"] + return f"results/alignment-properties/{wildcards.group}/{wildcards.sample}.json" diff --git a/workflow/rules/mapping.smk b/workflow/rules/mapping.smk index fab5df796..24608dd59 100644 --- a/workflow/rules/mapping.smk +++ b/workflow/rules/mapping.smk @@ -15,8 +15,6 @@ rule map_reads_bwa: "v3.8.0/bio/bwa/mem" -# Create distance and minimizer index before mapping -# Otherwise it will be performed on the first execution leading to race conditions for multiple samples rule map_reads_vg: input: reads=get_map_reads_input, @@ -31,7 +29,7 @@ rule map_reads_vg: extra="", sorting="fgbio", sort_order="queryname", - threads: 8 + threads: 64 wrapper: "v5.3.0/bio/vg/giraffe" diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index e86231b0a..0e162f24d 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -347,6 +347,17 @@ properties: - freebayes - varlociraptor + custom_alignment_properties: + type: object + properties: + activate: + type: boolean + column: + type: string + tsv: + type: string + + required: - samples - units @@ -355,3 +366,4 @@ required: - calling - params - annotations + - custom_alignment_properties