From 6933222307c3501fbfa6c1576a749bfe7d9b2b81 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 25 Oct 2021 13:53:37 -0700 Subject: [PATCH 1/8] added functionality for computing expansion probabilities --- cassiopeia/tools/topology.py | 100 +++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 cassiopeia/tools/topology.py diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py new file mode 100644 index 00000000..5ab24344 --- /dev/null +++ b/cassiopeia/tools/topology.py @@ -0,0 +1,100 @@ +""" +Utilities to assess topological properties of a phylogeny, such as balance +and expansion. +""" +from typing import Union + +import math +import numpy as np +import pandas as pd + +from cassiopeia.data import CassiopeiaTree + + +def compute_expansion_probabilities( + tree: CassiopeiaTree, + min_clade_size: int = 10, + min_depth: int = 1, + copy=True, +) -> Union[CassiopeiaTree, None]: + """Call expansions on a tree. + + Uses the methodology described in Yang, Jones et al, BioRxiv (2021) to + assess the expansion probability of a given subclade of a phylogeny. + + This function will add an attribute "expansion_probability" to the tree. + + Args: + tree: CassiopeiaTree + min_clade_size: Minimum number of leaves in a subtree to be considered. + min_depth: Minimum depth of clade to be considered. + copy: Return copy. + + Returns: + None. Adds attributes to the tree. + """ + + tree = tree.copy() if copy else tree + + # instantiate attributes + tree.set_attribute(tree.root, "depth", 0) + for node in tree.depth_first_traverse_nodes(postorder=False): + tree.set_attribute(node, "expansion_probability", 1.0) + tree.set_attribute( + node, "depth", tree.get_attribute(tree.parent(node, "depth")) + 1 + ) + + for node in tree.depth_first_traverse_nodes(postorder=False): + + n = len(tree.leaves_in_subtree(node)) + depth = tree.get_attribute(node, "depth") + if depth >= min_depth: + + k = len(tree.children(node)) + for c in tree.children(node): + + if len(tree.leaves_in_subtree(c)) < min_clade_size: + continue + + b = len(tree.leaves_in_subtree(c)) + p = np.sum( + [ + simple_coalescent_probability(n, b2, k) + for b2 in range(b, n - k + 2) + ] + ) + tree.set_attribute(c, "expansion_probability", p) + + return tree if copy else None + + +def simple_coalescent_probability(n: int, b: int, k: int) -> float: + """Simple coalescent probability of imbalance. + + Assuming a simple coalescent model, compute the probability that a given + lineage has exactly b samples, given there are n cells and k lineages + overall. + + Args: + n: Number of leaves in subtree + b: Number of leaves in one lineage + k: Number of lineages + Returns: + Probability of observing b leaves on one lineage in a tree of n total + leaves + """ + return nCr(n - b - 1, k - 2) / nCr(n - 1, k - 1) + + +def nCr(n: int, k: int) -> float: + """Compute the quantity n choose k. + + Args: + n: Number of items total. + k: Number of items to choose. + + Returns: + The number of ways to choose k items from n. + """ + f = math.factorial + return f(n) // f(k) // f(n - k) From ef6eace57b503b0323585512584178e218cad108 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 25 Oct 2021 15:02:42 -0700 Subject: [PATCH 2/8] added tests for computing expansion proportions --- cassiopeia/tools/__init__.py | 1 + cassiopeia/tools/topology.py | 36 ++++++++--- test/tools_tests/topology_test.py | 103 ++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 10 deletions(-) create mode 100644 test/tools_tests/topology_test.py diff --git a/cassiopeia/tools/__init__.py b/cassiopeia/tools/__init__.py index e4549f00..a099d222 100644 --- a/cassiopeia/tools/__init__.py +++ b/cassiopeia/tools/__init__.py @@ -3,3 +3,4 @@ from .autocorrelation import compute_morans_i from .branch_length_estimator import IIDExponentialBayesian, IIDExponentialMLE from .small_parsimony import fitch_count, fitch_hartigan, score_small_parsimony +from .topology import compute_expansion_probabilities \ No newline at end of file diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 5ab24344..41676e94 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -9,25 +9,34 @@ import pandas as pd from cassiopeia.data import CassiopeiaTree +from cassiopeia.mixins import CassiopeiaError def compute_expansion_probabilities( tree: CassiopeiaTree, min_clade_size: int = 10, min_depth: int = 1, - copy=True, + copy: bool = False, ) -> Union[CassiopeiaTree, None]: - """Call expansions on a tree. + """Call expansion probabilities on a tree. Uses the methodology described in Yang, Jones et al, BioRxiv (2021) to assess the expansion probability of a given subclade of a phylogeny. - This function will add an attribute "expansion_probability" to the tree. + The probability corresponds to the probability that a given subclade + contains the number of cells as would be expected under a simple coalescent + model. Often, if the probability is less than some threshold (e.g., 0.05), + this might indicate that there exists some subclade under this node that + to which this expansion probability can be attributed. + + This function will add an attribute "expansion_probability" to the tree, and + return None unless :param:`copy` is set to True. Args: tree: CassiopeiaTree min_clade_size: Minimum number of leaves in a subtree to be considered. - min_depth: Minimum depth of clade to be considered. + min_depth: Minimum depth of clade to be considered. Depth is measured + in number of nodes from the root, not branch lengths. copy: Return copy. Returns: @@ -37,12 +46,15 @@ def compute_expansion_probabilities( tree = tree.copy() if copy else tree # instantiate attributes - tree.set_attribute(tree.root, "depth", 0) for node in tree.depth_first_traverse_nodes(postorder=False): tree.set_attribute(node, "expansion_probability", 1.0) - tree.set_attribute( - node, "depth", tree.get_attribute(tree.parent(node, "depth")) + 1 - ) + + if tree.is_root(node): + tree.set_attribute(node, "depth", 0) + else: + tree.set_attribute( + node, "depth", tree.get_attribute(tree.parent(node), "depth") + 1 + ) for node in tree.depth_first_traverse_nodes(postorder=False): @@ -83,10 +95,10 @@ def simple_coalescent_probability(n: int, b: int, k: int) -> float: Probability of observing b leaves on one lineage in a tree of n total leaves """ - return nCr(n - b - 1, k - 2) / nCr(n - 1, k - 1) + return nCk(n - b - 1, k - 2) / nCk(n - 1, k - 1) -def nCr(n: int, k: int) -> float: +def nCk(n: int, k: int) -> float: """Compute the quantity n choose k. Args: @@ -96,5 +108,9 @@ def nCr(n: int, k: int) -> float: Returns: The number of ways to choose k items from n. """ + + if k > n: + raise CassiopeiaError("Argument k cannot be larger than n.") + f = math.factorial return f(n) // f(k) // f(n - k) diff --git a/test/tools_tests/topology_test.py b/test/tools_tests/topology_test.py new file mode 100644 index 00000000..815431f6 --- /dev/null +++ b/test/tools_tests/topology_test.py @@ -0,0 +1,103 @@ +""" +Test suite for the topology functions in +cassiopeia/tools/topology.py +""" +import unittest + +import networkx as nx +import numpy as np +import pandas as pd + +import cassiopeia as cas +from cassiopeia.mixins import CassiopeiaError +from cassiopeia.tools import topology + + +class TestTopology(unittest.TestCase): + def setUp(self) -> None: + + tree = nx.DiGraph() + tree.add_edge("0", "1") + tree.add_edge("0", "2") + tree.add_edge("1", "3") + tree.add_edge("1", "4") + tree.add_edge("1", "5") + tree.add_edge("2", "6") + tree.add_edge("2", "7") + tree.add_edge("3", "8") + tree.add_edge("3", "9") + tree.add_edge("7", "10") + tree.add_edge("7", "11") + tree.add_edge("8", "12") + tree.add_edge("8", "13") + tree.add_edge("9", "14") + tree.add_edge("9", "15") + tree.add_edge("3", "16") + tree.add_edge("16", "17") + tree.add_edge("16", "18") + + self.tree = cas.data.CassiopeiaTree(tree=tree) + + def test_simple_choose_function(self): + + num_choices = topology.nCk(10, 2) + + self.assertEqual(num_choices, 45) + + self.assertRaises(CassiopeiaError, topology.nCk, 5, 7) + + def test_simple_coalescent_probability(self): + + N = 100 + B = 2 + K = 60 + coalescent_probability = topology.simple_coalescent_probability(N, B, K) + self.assertAlmostEqual(coalescent_probability, 0.24, delta=0.01) + + self.assertRaises( + CassiopeiaError, topology.simple_coalescent_probability, 50, 2, 60 + ) + + def test_expansion_probability(self): + + # make sure attributes are instantiated correctly + cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=20) + for node in self.tree.depth_first_traverse_nodes(postorder=False): + self.assertEqual( + 1.0, self.tree.get_attribute(node, "expansion_probability") + ) + + cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=2) + expected_probabilities = { + "0": 1.0, + "1": 1.0, + "2": 1.0, + "3": 0.047, + "4": 1.0, + "5": 1.0, + "6": 1.0, + "7": 0.5, + "8": 0.6, + "9": 0.6, + "10": 1.0, + "11": 1.0, + "12": 1.0, + "13": 1.0, + "14": 1.0, + "15": 1.0, + "16": 0.6, + "17": 1.0, + "18": 1.0, + } + + for node in self.tree.depth_first_traverse_nodes(postorder=False): + expected = expected_probabilities[node] + self.assertAlmostEqual( + expected, + self.tree.get_attribute(node, "expansion_probability"), + delta=0.01, + ) + + +if __name__ == "__main__": + unittest.main() From 57bbea9d081b55c44826364a978b8f3bf4d8c990 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 25 Oct 2021 15:08:16 -0700 Subject: [PATCH 3/8] updated literature citation in documentation --- cassiopeia/tools/topology.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 41676e94..5bf52f43 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -22,6 +22,8 @@ def compute_expansion_probabilities( Uses the methodology described in Yang, Jones et al, BioRxiv (2021) to assess the expansion probability of a given subclade of a phylogeny. + Mathematical treatment of the coalescent probability is described in + Griffiths and Tavare, Stochastic Models (1998). The probability corresponds to the probability that a given subclade contains the number of cells as would be expected under a simple coalescent From 784fc5b8f33f727f4d53bab24d1aa953d04c542b Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 25 Oct 2021 15:31:09 -0700 Subject: [PATCH 4/8] added more tests, fixed logic in expansion_probs --- cassiopeia/tools/topology.py | 36 +++++++------- test/tools_tests/topology_test.py | 83 +++++++++++++++++++++++++++++-- 2 files changed, 99 insertions(+), 20 deletions(-) diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 5bf52f43..4afe6c83 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -61,23 +61,25 @@ def compute_expansion_probabilities( for node in tree.depth_first_traverse_nodes(postorder=False): n = len(tree.leaves_in_subtree(node)) - depth = tree.get_attribute(node, "depth") - if depth >= min_depth: - - k = len(tree.children(node)) - for c in tree.children(node): - - if len(tree.leaves_in_subtree(c)) < min_clade_size: - continue - - b = len(tree.leaves_in_subtree(c)) - p = np.sum( - [ - simple_coalescent_probability(n, b2, k) - for b2 in range(b, n - k + 2) - ] - ) - tree.set_attribute(c, "expansion_probability", p) + + k = len(tree.children(node)) + for c in tree.children(node): + + if len(tree.leaves_in_subtree(c)) < min_clade_size: + continue + + depth = tree.get_attribute(c, "depth") + if depth < min_depth: + continue + + b = len(tree.leaves_in_subtree(c)) + p = np.sum( + [ + simple_coalescent_probability(n, b2, k) + for b2 in range(b, n - k + 2) + ] + ) + tree.set_attribute(c, "expansion_probability", p) return tree if copy else None diff --git a/test/tools_tests/topology_test.py b/test/tools_tests/topology_test.py index 815431f6..763250e2 100644 --- a/test/tools_tests/topology_test.py +++ b/test/tools_tests/topology_test.py @@ -9,7 +9,7 @@ import pandas as pd import cassiopeia as cas -from cassiopeia.mixins import CassiopeiaError +from cassiopeia.mixins import CassiopeiaError, CassiopeiaTreeError from cassiopeia.tools import topology @@ -70,8 +70,8 @@ def test_expansion_probability(self): cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=2) expected_probabilities = { "0": 1.0, - "1": 1.0, - "2": 1.0, + "1": 0.3, + "2": 0.8, "3": 0.047, "4": 1.0, "5": 1.0, @@ -98,6 +98,83 @@ def test_expansion_probability(self): delta=0.01, ) + def test_expansion_probability_variable_depths(self): + + cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=2, min_depth=3) + expected_probabilities = { + "0": 1.0, + "1": 1.0, + "2": 1.0, + "3": 1.0, + "4": 1.0, + "5": 1.0, + "6": 1.0, + "7": 1.0, + "8": 0.6, + "9": 0.6, + "10": 1.0, + "11": 1.0, + "12": 1.0, + "13": 1.0, + "14": 1.0, + "15": 1.0, + "16": 0.6, + "17": 1.0, + "18": 1.0, + } + + for node in self.tree.depth_first_traverse_nodes(postorder=False): + expected = expected_probabilities[node] + self.assertAlmostEqual( + expected, + self.tree.get_attribute(node, "expansion_probability"), + delta=0.01, + ) + + def test_expansion_probability_copy_tree(self): + + tree = cas.tl.compute_expansion_probabilities( + self.tree, min_clade_size=2, min_depth=1, copy=True + ) + + expected_probabilities = { + "0": 1.0, + "1": 0.3, + "2": 0.8, + "3": 0.047, + "4": 1.0, + "5": 1.0, + "6": 1.0, + "7": 0.5, + "8": 0.6, + "9": 0.6, + "10": 1.0, + "11": 1.0, + "12": 1.0, + "13": 1.0, + "14": 1.0, + "15": 1.0, + "16": 0.6, + "17": 1.0, + "18": 1.0, + } + + for node in self.tree.depth_first_traverse_nodes(postorder=False): + expected_copy = expected_probabilities[node] + + self.assertAlmostEqual( + expected_copy, + tree.get_attribute(node, "expansion_probability"), + delta=0.01, + ) + + self.assertRaises( + CassiopeiaTreeError, + self.tree.get_attribute, + node, + "expansion_probability", + ) + if __name__ == "__main__": unittest.main() From fb9ad08f7bda8c355ae5de678ac55a6c054f040e Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 25 Oct 2021 15:37:37 -0700 Subject: [PATCH 5/8] updated return statement in docs --- cassiopeia/tools/topology.py | 5 +++-- docs/api/tools.rst | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 4afe6c83..7eff806f 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -42,7 +42,8 @@ def compute_expansion_probabilities( copy: Return copy. Returns: - None. Adds attributes to the tree. + If copy is set to False, returns the tree with attributes added + in place. Else, returns a new CassiopeiaTree. """ tree = tree.copy() if copy else tree @@ -71,7 +72,7 @@ def compute_expansion_probabilities( depth = tree.get_attribute(c, "depth") if depth < min_depth: continue - + b = len(tree.leaves_in_subtree(c)) p = np.sum( [ diff --git a/docs/api/tools.rst b/docs/api/tools.rst index 3347e481..beacae41 100644 --- a/docs/api/tools.rst +++ b/docs/api/tools.rst @@ -15,6 +15,7 @@ Small-Parsimony .. autosummary:: :toctree: reference/ + tl.compute_expansion_probabilities tl.compute_morans_i tl.fitch_count tl.fitch_hartigan From c83a84889a0194fe63a30443979b09280d93e654 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 10:04:06 -0700 Subject: [PATCH 6/8] updated docs; improved computation --- cassiopeia/tools/topology.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 7eff806f..527da1ef 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -25,11 +25,13 @@ def compute_expansion_probabilities( Mathematical treatment of the coalescent probability is described in Griffiths and Tavare, Stochastic Models (1998). - The probability corresponds to the probability that a given subclade - contains the number of cells as would be expected under a simple coalescent - model. Often, if the probability is less than some threshold (e.g., 0.05), - this might indicate that there exists some subclade under this node that - to which this expansion probability can be attributed. + The probability corresponds to the probability that, under a simple neutral + coalescent model, a given subclade contains the observed number of cells; in + other words, a one-sided p-value. Often, if the probability is less than + some threshold (e.g., 0.05), this might indicate that there exists some + subclade under this node that to which this expansion probability can be + attributed (i.e. the null hypothesis that the subclade is undergoing + neutral drift can be rejected). This function will add an attribute "expansion_probability" to the tree, and return None unless :param:`copy` is set to True. @@ -49,15 +51,14 @@ def compute_expansion_probabilities( tree = tree.copy() if copy else tree # instantiate attributes + _depths = {} for node in tree.depth_first_traverse_nodes(postorder=False): tree.set_attribute(node, "expansion_probability", 1.0) if tree.is_root(node): - tree.set_attribute(node, "depth", 0) + _depths[node] = 0 else: - tree.set_attribute( - node, "depth", tree.get_attribute(tree.parent(node), "depth") + 1 - ) + _depths[node] = (_depths[tree.parent(node)] + 1) for node in tree.depth_first_traverse_nodes(postorder=False): @@ -69,17 +70,17 @@ def compute_expansion_probabilities( if len(tree.leaves_in_subtree(c)) < min_clade_size: continue - depth = tree.get_attribute(c, "depth") + depth = _depths[c] if depth < min_depth: continue b = len(tree.leaves_in_subtree(c)) - p = np.sum( - [ - simple_coalescent_probability(n, b2, k) - for b2 in range(b, n - k + 2) - ] - ) + + # this value below is a simplification of the quantity: + # sum[simple_coalescent_probability(n, b2, k) for \ + # b2 in range(b, n - k + 2)] + p = nCk(n-b, k-1) / nCk(n-1, k-1) + tree.set_attribute(c, "expansion_probability", p) return tree if copy else None From 8a13c7fc91f47076d77ede18809223e3df424647 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 10:41:32 -0700 Subject: [PATCH 7/8] renamed expansion_probability to expansion_pvalue --- cassiopeia/tools/__init__.py | 2 +- cassiopeia/tools/topology.py | 22 +++++++++++----------- docs/api/tools.rst | 2 +- test/tools_tests/topology_test.py | 18 +++++++++--------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/cassiopeia/tools/__init__.py b/cassiopeia/tools/__init__.py index a099d222..4c39eab6 100644 --- a/cassiopeia/tools/__init__.py +++ b/cassiopeia/tools/__init__.py @@ -3,4 +3,4 @@ from .autocorrelation import compute_morans_i from .branch_length_estimator import IIDExponentialBayesian, IIDExponentialMLE from .small_parsimony import fitch_count, fitch_hartigan, score_small_parsimony -from .topology import compute_expansion_probabilities \ No newline at end of file +from .topology import compute_expansion_pvalues \ No newline at end of file diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 527da1ef..823f9530 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -12,28 +12,28 @@ from cassiopeia.mixins import CassiopeiaError -def compute_expansion_probabilities( +def compute_expansion_pvalues( tree: CassiopeiaTree, min_clade_size: int = 10, min_depth: int = 1, copy: bool = False, ) -> Union[CassiopeiaTree, None]: - """Call expansion probabilities on a tree. + """Call expansion pvalues on a tree. Uses the methodology described in Yang, Jones et al, BioRxiv (2021) to assess the expansion probability of a given subclade of a phylogeny. Mathematical treatment of the coalescent probability is described in Griffiths and Tavare, Stochastic Models (1998). - The probability corresponds to the probability that, under a simple neutral - coalescent model, a given subclade contains the observed number of cells; in - other words, a one-sided p-value. Often, if the probability is less than - some threshold (e.g., 0.05), this might indicate that there exists some - subclade under this node that to which this expansion probability can be - attributed (i.e. the null hypothesis that the subclade is undergoing + The probability computed corresponds to the probability that, under a simple + neutral coalescent model, a given subclade contains the observed number of + cells; in other words, a one-sided p-value. Often, if the probability is + less than some threshold (e.g., 0.05), this might indicate that there exists + some subclade under this node that to which this expansion probability can + be attributed (i.e. the null hypothesis that the subclade is undergoing neutral drift can be rejected). - This function will add an attribute "expansion_probability" to the tree, and + This function will add an attribute "expansion_pvalue" to the tree, and return None unless :param:`copy` is set to True. Args: @@ -53,7 +53,7 @@ def compute_expansion_probabilities( # instantiate attributes _depths = {} for node in tree.depth_first_traverse_nodes(postorder=False): - tree.set_attribute(node, "expansion_probability", 1.0) + tree.set_attribute(node, "expansion_pvalue", 1.0) if tree.is_root(node): _depths[node] = 0 @@ -81,7 +81,7 @@ def compute_expansion_probabilities( # b2 in range(b, n - k + 2)] p = nCk(n-b, k-1) / nCk(n-1, k-1) - tree.set_attribute(c, "expansion_probability", p) + tree.set_attribute(c, "expansion_pvalue", p) return tree if copy else None diff --git a/docs/api/tools.rst b/docs/api/tools.rst index beacae41..7352008d 100644 --- a/docs/api/tools.rst +++ b/docs/api/tools.rst @@ -15,7 +15,7 @@ Small-Parsimony .. autosummary:: :toctree: reference/ - tl.compute_expansion_probabilities + tl.compute_expansion_pvalues tl.compute_morans_i tl.fitch_count tl.fitch_hartigan diff --git a/test/tools_tests/topology_test.py b/test/tools_tests/topology_test.py index 763250e2..ad3a808a 100644 --- a/test/tools_tests/topology_test.py +++ b/test/tools_tests/topology_test.py @@ -61,13 +61,13 @@ def test_simple_coalescent_probability(self): def test_expansion_probability(self): # make sure attributes are instantiated correctly - cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=20) + cas.tl.compute_expansion_pvalues(self.tree, min_clade_size=20) for node in self.tree.depth_first_traverse_nodes(postorder=False): self.assertEqual( - 1.0, self.tree.get_attribute(node, "expansion_probability") + 1.0, self.tree.get_attribute(node, "expansion_pvalue") ) - cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=2) + cas.tl.compute_expansion_pvalues(self.tree, min_clade_size=2) expected_probabilities = { "0": 1.0, "1": 0.3, @@ -94,13 +94,13 @@ def test_expansion_probability(self): expected = expected_probabilities[node] self.assertAlmostEqual( expected, - self.tree.get_attribute(node, "expansion_probability"), + self.tree.get_attribute(node, "expansion_pvalue"), delta=0.01, ) def test_expansion_probability_variable_depths(self): - cas.tl.compute_expansion_probabilities(self.tree, min_clade_size=2, min_depth=3) + cas.tl.compute_expansion_pvalues(self.tree, min_clade_size=2, min_depth=3) expected_probabilities = { "0": 1.0, "1": 1.0, @@ -127,13 +127,13 @@ def test_expansion_probability_variable_depths(self): expected = expected_probabilities[node] self.assertAlmostEqual( expected, - self.tree.get_attribute(node, "expansion_probability"), + self.tree.get_attribute(node, "expansion_pvalue"), delta=0.01, ) def test_expansion_probability_copy_tree(self): - tree = cas.tl.compute_expansion_probabilities( + tree = cas.tl.compute_expansion_pvalues( self.tree, min_clade_size=2, min_depth=1, copy=True ) @@ -164,7 +164,7 @@ def test_expansion_probability_copy_tree(self): self.assertAlmostEqual( expected_copy, - tree.get_attribute(node, "expansion_probability"), + tree.get_attribute(node, "expansion_pvalue"), delta=0.01, ) @@ -172,7 +172,7 @@ def test_expansion_probability_copy_tree(self): CassiopeiaTreeError, self.tree.get_attribute, node, - "expansion_probability", + "expansion_pvalue", ) From 6e8b6501fc7d3f0a30eb7f0ef2ec8a33eaf74681 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 10:48:41 -0700 Subject: [PATCH 8/8] updated docs in expansion pvalues with computational complexity --- cassiopeia/tools/topology.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 823f9530..0549559e 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -36,6 +36,10 @@ def compute_expansion_pvalues( This function will add an attribute "expansion_pvalue" to the tree, and return None unless :param:`copy` is set to True. + On a typical balanced tree, this function will perform in O(n log n) time, + but can be up to O(n^3) on highly unbalanced trees. A future endeavor may + be to impelement the function in O(n) time. + Args: tree: CassiopeiaTree min_clade_size: Minimum number of leaves in a subtree to be considered.