Skip to content

Commit

Permalink
Merge pull request #233 from polio-nanopore/haplo_issue
Browse files Browse the repository at this point in the history
Haplo issue
  • Loading branch information
ZoeVance authored Mar 26, 2024
2 parents 504d3b8 + d3887fe commit 6b209c0
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
27 changes: 24 additions & 3 deletions piranha/analysis/haplo_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@


def parse_partition_file(partition_file):

partitions = collections.defaultdict(set)
with open(partition_file, "r") as f:
part = ""
Expand All @@ -35,6 +34,11 @@ def collapse_close(flopp_file,distance_cutoff,vcf_file):
final_haplos = []
vcf = parse_VCF(vcf_file)
parsed_flopp = parse_flopp_reads(flopp_file)
#if flopp wasn't run then the file is empty, parsing gives empty list
#need a single partition containing all reads
if parsed_flopp == []:
return [[0]]

for h in parsed_flopp:
collapse_into_current = [haplo_pos]
if haplo_pos in collapsed:
Expand Down Expand Up @@ -176,7 +180,12 @@ def parse_support_col(col):
total_reads = 0
base_support = {}
for base in base_split:
base_val,support_reads = base.split(':')
try:
base_val,support_reads = base.split(':')
except ValueError:
# -1 in the support col - means zero reads in partition overlap this position
base_val = '-1'
support_reads = 0
# if base_val == selbase_val:
# base_support = int(support_reads)
base_support[base_val] = int(support_reads)
Expand All @@ -187,6 +196,9 @@ def check_site_sig(base_support,allele_freqs):
#check if allele freqs are consistent with random sample from overall reads for this site
#need to know these are the same order
#reads might have 100% one allele, need to insert zero in this case, then sort to make sure order correct
if '-1' in base_support.keys():
# zero read coverage, this site shouldn't be significant
return False
for key in allele_freqs:
if key not in base_support:
base_support[key] = 0
Expand Down Expand Up @@ -226,7 +238,16 @@ def get_haplo_dist(h1,h2,parsed_VCF):
def sum_base_support(dict1,dict2):
out_dict = {}
for key in dict1:
out_dict[key] = dict1[key] + dict2[key]
try:
out_dict[key] = dict1[key] + dict2[key]
except KeyError:
#base not recorded in dict2 - probably a -1 col
out_dict[key] = dict1[key]
# check for extra keys in other dict
for key in dict2:
if key not in dict1.keys():
out_dict[key] = dict2[key]

return out_dict

def calc_sd(haplotypes):
Expand Down
21 changes: 14 additions & 7 deletions piranha/scripts/piranha_haplotype.smk
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,11 @@ rule flopp:
partition = os.path.join(config[KEY_TEMPDIR],"reference_analysis","{reference}","haplotyping","flopp_partitions","{reference}_part.txt")
shell:
"""
VARIANTS=$(grep -v '#' {input.vcf} | wc -l)
if [[ $VARIANTS -gt 0 ]]
# || true to stop snakemake catching the exit code 1 grep returns on finding zero lines
VARIANTS=$((grep -v '#' {input.vcf} | wc -l) || true)
echo "Number of variants"
echo $VARIANTS
if [[ $VARIANTS -gt 1 ]]
then
flopp -b {input.bam:q} \
-c {input.vcf:q} \
Expand All @@ -95,7 +98,7 @@ rule flopp:
-o {output.flopp:q} \
-P {params.partition_path}
else
echo "No variants called - single reference haplotype"
echo "Less than 2 variants called - haplotyping not possible, one haplotype will be output"
touch {output.partition:q}
touch {output.flopp:q}
fi
Expand Down Expand Up @@ -129,10 +132,14 @@ rule haplotype_qc:
with open(output.txt,"w") as fhaplo:
merged_haplo_count = 0
for subset in merge_info:
reads = set()
for part in subset:
part_reads = partitions[part]
reads = reads.union(part_reads)
if subset == [0] and len(merge_info) == 1:
# flopp wasn't run, single haplo to be called using all reads
reads = [read for read in seq_index]
else:
reads = set()
for part in subset:
part_reads = partitions[part]
reads = reads.union(part_reads)

if len(reads) > params.min_reads:
print(green(f"Haplotype HAP0{merged_haplo_count}:"), len(reads), "reads")
Expand Down

0 comments on commit 6b209c0

Please sign in to comment.