Skip to content

Commit

Permalink
added support to include both HLA genotyping on DNA and RNA
Browse files Browse the repository at this point in the history
  • Loading branch information
riasc committed Aug 9, 2023
1 parent 3fce192 commit b3c9f7c
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 3 deletions.
7 changes: 7 additions & 0 deletions workflow/envs/optitype.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- conda-forge
- bioconda
- nodefaults
dependencies:
- optitype =1.3.5
- pysam
13 changes: 13 additions & 0 deletions workflow/envs/transindel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- bioconda
- conda-forge
- defaults
- r
dependencies:
- samtools
- pysam
- htseq
- pyfaidx
- pip
- pip:
- vcfpy==0.13.6
5 changes: 5 additions & 0 deletions workflow/envs/yara.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- bioconda
dependencies:
- yara
- samtools
25 changes: 22 additions & 3 deletions workflow/rules/hlatyping.smk
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ if config['data']['rnaseq'] is not None:
rule get_hla_filtering_input_single_RNA:
input:
get_hla_flt_rna_se,
dna=multiext("resources/hla/yara/idx/dna",
dna=multiext("resources/hla/yara/idx/rna",
".lf.drp", ".lf.drs", ".lf.drv",
".lf.pst", ".rid.concat", ".rid.limits",
".sa.ind", ".sa.len", ".sa.val",
Expand Down Expand Up @@ -131,7 +131,7 @@ if config['data']['rnaseq'] is not None:
rule get_hla_filtering_input_paired_RNA:
input:
get_hla_flt_rna_pe,
dna=multiext("resources/hla/yara/idx/dna",
dna=multiext("resources/hla/yara/idx/rna",
".lf.drp", ".lf.drs", ".lf.drv",
".lf.pst", ".rid.concat", ".rid.limits",
".sa.ind", ".sa.len", ".sa.val",
Expand Down Expand Up @@ -187,9 +187,28 @@ rule hla_genotyping_RNA:
"""
OptiTypePipeline.py --input {input} \
--outdir results/{wildcards.sample}/hla/ \
--prefix {wildcards.group}_dna --dna > {log}
--prefix {wildcards.group}_rna --rna > {log}
"""


rule merge_alleles:
input:
get_alleles,
output:
"results/{sample}/hla/alleles.tsv"
message:
"Merging the detected alleles in {wildcards.sample}"
log:
"logs/{sample}/hla/merge_alleles"
conda:
"../envs/basic.yml"
shell:
"""
python3 workflow/scripts/merge_alleles.py '{input}' \
{output} 2> {log}
"""





41 changes: 41 additions & 0 deletions workflow/scripts/merge_alleles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import sys

def main():
alleles = {
'A1': [], 'A2': [],'B1': [],
'B2': [], 'C1': [],'C2': []
}

for alfile in sys.argv[1].split(' '):
with open(alfile, 'r') as f:
next(f)
for line in f:
line = line.rstrip().split('\t')

if line[1] not in alleles['A1']:
alleles['A1'].append(line[1])

if line[2] not in alleles['A2']:
alleles['A2'].append(line[2])

if line[3] not in alleles['B1']:
alleles['B1'].append(line[3])

if line[4] not in alleles['B2']:
alleles['B2'].append(line[4])

if line[5] not in alleles['C1']:
alleles['C1'].append(line[5])

if line[6] not in alleles['C2']:
alleles['C2'].append(line[6])


output = open(sys.argv[2], 'w')
for key in alleles.keys():
output.write(key + '\t' + '\t'.join(alleles[key]) + '\n')
output.close()



main()

0 comments on commit b3c9f7c

Please sign in to comment.