Skip to content

Commit

Permalink
Merge pull request #73 from hall-lab/dup_improvements
Browse files Browse the repository at this point in the history
Genotyping calculation improvements
  • Loading branch information
ernfrid authored Jan 19, 2018
2 parents 376bd1d + 8ed6810 commit a9eb890
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 94 deletions.
8 changes: 3 additions & 5 deletions svtyper/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,8 @@ def sv_genotype(bam_string,
QR = int(split_weight * ref_seq) + int(disc_weight * ref_span)
QA = int(split_weight * alt_splitters) + int(disc_weight * alt_span)
gt_lplist = bayes_gt(QR, QA, is_dup)
gt_idx = gt_lplist.index(max(gt_lplist))
best, second_best = sorted([ (i, e) for i, e in enumerate(gt_lplist) ], key=lambda(x): x[1], reverse=True)[0:2]
gt_idx = best[0]

# print log probabilities of homref, het, homalt
if debug:
Expand Down Expand Up @@ -468,10 +469,7 @@ def sv_genotype(bam_string,
if gt_sum > 0:
gt_sum_log = math.log(gt_sum, 10)
sample_qual = abs(-10 * (gt_lplist[0] - gt_sum_log)) # phred-scaled probability site is non-reference in this sample
if 1 - (10**gt_lplist[gt_idx] / 10**gt_sum_log) == 0:
phred_gq = 200
else:
phred_gq = abs(-10 * math.log(1 - (10**gt_lplist[gt_idx] / 10**gt_sum_log), 10))
phred_gq = min(-10 * (second_best[1] - best[1]), 200)
var.genotype(sample.name).set_format('GQ', int(phred_gq))
var.genotype(sample.name).set_format('SQ', sample_qual)
var.qual += sample_qual
Expand Down
8 changes: 3 additions & 5 deletions svtyper/singlesample.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,8 @@ def bayesian_genotype(breakpoint, counts, split_weight, disc_weight, debug):

# the actual bayesian calculation and decision
gt_lplist = bayes_gt(QR, QA, is_dup)
gt_idx = gt_lplist.index(max(gt_lplist))
best, second_best = sorted([ (i, e) for i, e in enumerate(gt_lplist) ], key=lambda(x): x[1], reverse=True)[0:2]
gt_idx = best[0]

# print log probabilities of homref, het, homalt
if debug:
Expand Down Expand Up @@ -450,10 +451,7 @@ def bayesian_genotype(breakpoint, counts, split_weight, disc_weight, debug):
if gt_sum > 0:
gt_sum_log = math.log(gt_sum, 10)
sample_qual = abs(-10 * (gt_lplist[0] - gt_sum_log)) # phred-scaled probability site is non-reference in this sample
if 1 - (10**gt_lplist[gt_idx] / 10**gt_sum_log) == 0:
phred_gq = 200
else:
phred_gq = abs(-10 * math.log(1 - (10**gt_lplist[gt_idx] / 10**gt_sum_log), 10))
phred_gq = min(-10 * (second_best[1] - best[1]), 200)
result['formats']['GQ'] = int(phred_gq)
result['formats']['SQ'] = sample_qual
result['qual'] += sample_qual
Expand Down
2 changes: 1 addition & 1 deletion svtyper/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def log_choose(n, k):
def bayes_gt(ref, alt, is_dup):
# probability of seeing an alt read with true genotype of of hom_ref, het, hom_alt respectively
if is_dup: # specialized logic to handle non-destructive events such as duplications
p_alt = [1e-2, 1/3.0, 0.5]
p_alt = [1e-2, 0.2, 1/3.0]
else:
p_alt = [1e-3, 0.5, 0.9]

Expand Down
2 changes: 1 addition & 1 deletion svtyper/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__author__ = "Colby Chiang ([email protected])"
__version__ = "v0.4.0"
__version__ = "v0.5.0"
Loading

0 comments on commit a9eb890

Please sign in to comment.