Skip to content

Commit

Permalink
Merge pull request #4 from adelq/pnps-dnds
Browse files Browse the repository at this point in the history
Add dN/dS correction and rename existing function to pN/pS
  • Loading branch information
adelq authored Jan 5, 2018
2 parents e62f42b + 7a832f6 commit 0d49a08
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 28 deletions.
9 changes: 6 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ dN/dS Calculator
:target: https://pypi.python.org/pypi/dnds

Calculate dN/dS ratio precisely (Ka/Ks) using a codon-by-codon counting
method.
method. Also calculates the pN/pS ratio precisely (previously referred to as
dN/dS).

Usage
-----
Expand All @@ -17,7 +18,9 @@ Usage
>>> sequence_1 = "ATGCTTTTGAAATCG"
>>> sequence_2 = "ATGCGTTCGAAGTCG"
>>> dnds(sequence_1, sequence2)
>>> pnps(sequence_1, sequence2)
Fraction(38, 71)
>>> round(float(dnds(sequence_1, sequence2)), 3)
>>> round(float(pnps(sequence_1, sequence2)), 3)
0.535
>>> round(dnds(sequence_1, sequence_2), 3)
0.467
79 changes: 58 additions & 21 deletions dnds.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
"""dnds
This module is a reference implementation of estimating nucleotide substitution
neutrality by estimating the percent of synonymous and nonsynonymous mutations.
"""
from __future__ import print_function, division
from codons import codons
from math import log
from fractions import Fraction
import logging
from codons import codons

BASES = {'A', 'G', 'T', 'C'}

Expand Down Expand Up @@ -80,16 +87,16 @@ def codon_subs(codon1, codon2):
return 0
elif diff == 1:
return int(translate(codon1) == translate(codon2))
else:
syn = 0
for i in range(len(codon1)):
base1 = codon1[i]
base2 = codon2[i]
if base1 != base2:
new_codon = codon1[:i] + base2 + codon1[i + 1:]
syn += int(is_synonymous(codon1, new_codon))
syn += int(is_synonymous(codon2, new_codon))
return syn / diff

syn = 0
for i in range(len(codon1)):
base1 = codon1[i]
base2 = codon2[i]
if base1 != base2:
new_codon = codon1[:i] + base2 + codon1[i + 1:]
syn += int(is_synonymous(codon1, new_codon))
syn += int(is_synonymous(codon2, new_codon))
return syn / diff


def substitutions(seq1, seq2):
Expand All @@ -105,27 +112,57 @@ def substitutions(seq1, seq2):
return (syn, dna_changes - syn)


def clean_sequence(seq):
"""Clean up provided sequence by removing whitespace."""
return seq.replace(' ', '')


def pnps(seq1, seq2):
"""Main function to calculate pN/pS between two DNA sequences."""
# Strip any whitespace from both strings
seq1 = clean_sequence(seq1)
seq2 = clean_sequence(seq2)
# Check that both sequences have the same length
assert len(seq1) == len(seq2)
# Check that sequences are codons
assert len(seq1) % 3 == 0

syn_sites = syn_sum(seq1, seq2)
non_sites = len(seq1) - syn_sites
logging.info('Sites (syn/nonsyn): {}, {}'.format(syn_sites, non_sites))
syn_subs, non_subs = substitutions(seq1, seq2)
logging.info('pN: {} / {}\t\tpS: {} / {}'
.format(non_subs, round(non_sites), syn_subs, round(syn_sites)))
pn = non_subs / non_sites
ps = syn_subs / syn_sites
return pn / ps


def dnds(seq1, seq2):
"""Main function to calculate dN/dS between two DNA sequences"""
"""Main function to calculate dN/dS between two DNA sequences per Nei &
Gojobori 1986. This includes the per site conversion adapted from Jukes &
Cantor 1967.
"""
# Strip any whitespace from both strings
seq1 = seq1.replace(' ', '')
seq2 = seq2.replace(' ', '')
seq1 = clean_sequence(seq1)
seq2 = clean_sequence(seq2)
# Check that both sequences have the same length
assert len(seq1) == len(seq2)
# Check that sequences are codons
assert len(seq1) % 3 == 0
assert len(seq2) % 3 == 0

syn_sites = syn_sum(seq1, seq2)
non_sites = len(seq1) - syn_sites
# print(syn_sites, non_sites)
logging.info('Sites (syn/nonsyn): {}, {}'.format(syn_sites, non_sites))
syn_subs, non_subs = substitutions(seq1, seq2)
# print('dN: {} / {}\t\tdS: {} / {}'
# .format(non_subs, round(non_sites), syn_subs, round(syn_sites)))
dn = non_subs / non_sites
ds = syn_subs / syn_sites
pn = non_subs / non_sites
ps = syn_subs / syn_sites
dn = -(3 / 4) * log(1 - (4 * pn / 3))
ds = -(3 / 4) * log(1 - (4 * ps / 3))
logging.info('dN: {}\t\tdS: {}'.format(round(dn, 3), round(ds, 3)))
return dn / ds


if __name__ == '__main__':
print(dnds('ACC GTG GGA TGC ACC GGT GTG CCC',
print(pnps('ACC GTG GGA TGC ACC GGT GTG CCC',
'ACA GTG AGA TAT AAA GGA GAG AAC'))
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
version=version,
description="Calculate dN/dS ratio precisely (Ka/Ks) using a codon-by-codon counting method.",
author="Adel Qalieh",
author_email="adelq@sas.upenn.edu",
author_email="adelq@med.umich.edu",
url="https://github.com/adelq/dnds",
license="MIT",
py_modules=['dnds', 'codons'],
Expand All @@ -22,4 +22,5 @@
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
])
15 changes: 12 additions & 3 deletions test_dnds.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from dnds import dnds, substitutions, dnds_codon, dnds_codon_pair, syn_sum, translate
from __future__ import division
from fractions import Fraction
from nose.tools import assert_equal, assert_almost_equal
from dnds import dnds, pnps, substitutions, dnds_codon, dnds_codon_pair, syn_sum, translate

# From Canvas practice problem
TEST_SEQ1 = 'ACTCCGAACGGGGCGTTAGAGTTGAAACCCGTTAGA'
TEST_SEQ2 = 'ACGCCGATCGGCGCGATAGGGTTCAAGCTCGTACGA'
# From in-class problem set
TEST_SEQ3 = 'ATGCTTTTGAAATCGATCGTTCGTTCACATCGATGGATC'
TEST_SEQ4 = 'ATGCGTTCGAAGTCGATCGATCGCTCAGATCGATCGATC'
# From http://bioinformatics.cvr.ac.uk/blog/calculating-dnds-for-ngs-datasets/
TEST_SEQ5 = 'ATGAAACCCGGGTTTTAA'
TEST_SEQ6 = 'ATGAAACGCGGCTACTAA'


def test_translate():
Expand Down Expand Up @@ -77,9 +81,14 @@ def test_syn_subs():
assert_equal(substitutions("CCC", "AAC"), (0, 2))


def test_pnps():
assert_almost_equal(pnps(TEST_SEQ1, TEST_SEQ2), 0.269, delta=0.1)
assert_almost_equal(pnps(TEST_SEQ3, TEST_SEQ4), 0.86, delta=0.1)
assert_almost_equal(pnps(TEST_SEQ5, TEST_SEQ6), 0.1364 / 0.6001, delta=1e-4)


def test_dnds():
assert_almost_equal(dnds(TEST_SEQ1, TEST_SEQ2), 0.269, delta=0.1)
assert_almost_equal(dnds(TEST_SEQ3, TEST_SEQ4), 0.86, delta=0.1)
assert_almost_equal(dnds(TEST_SEQ5, TEST_SEQ6), 0.1247, delta=1e-4)

# DNDS.pdf - wrong based on email conversation with Sean
#
Expand Down

0 comments on commit 0d49a08

Please sign in to comment.