From e29aa8c4a0e44613058ff64d374ffb34fcccf2e7 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 15:55:19 -0700 Subject: [PATCH 01/12] added inter cluster distance calculatoin --- cassiopeia/data/utilities.py | 106 ++++++++++++++++++++++++++++++++--- cassiopeia/tools/topology.py | 2 +- 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py index ba503d94..71d9ca53 100755 --- a/cassiopeia/data/utilities.py +++ b/cassiopeia/data/utilities.py @@ -14,6 +14,7 @@ from cassiopeia.data import CassiopeiaTree from cassiopeia.mixins import CassiopeiaTreeWarning, is_ambiguous_state +from cassiopeia.mixins.errors import CassiopeiaError from cassiopeia.preprocess import utilities as preprocessing_utilities @@ -309,10 +310,8 @@ def sample_bootstrap_allele_tables( allele_table ) - lineage_profile = ( - preprocessing_utilities.convert_alleletable_to_lineage_profile( - allele_table, cut_sites - ) + lineage_profile = preprocessing_utilities.convert_alleletable_to_lineage_profile( + allele_table, cut_sites ) intbcs = allele_table["intBC"].unique() @@ -375,6 +374,7 @@ def resolve_most_abundant(state: Tuple[int, ...]) -> int: [state for state, count in most_common if count == most_common[0][1]] ) + def compute_phylogenetic_weight_matrix( tree: CassiopeiaTree, inverse: bool = False, @@ -399,10 +399,10 @@ def compute_phylogenetic_weight_matrix( An NxN phylogenetic weight matrix """ N = tree.n_cell - W = pd.DataFrame(np.zeros((N , N)), index=tree.leaves, columns=tree.leaves) + W = pd.DataFrame(np.zeros((N, N)), index=tree.leaves, columns=tree.leaves) for leaf1 in tree.leaves: - + distances = tree.get_distances(leaf1, leaves_only=True) for leaf2, _d in distances.items(): @@ -412,5 +412,95 @@ def compute_phylogenetic_weight_matrix( W.loc[leaf1, leaf2] = W.loc[leaf2, leaf1] = _d np.fill_diagonal(W.values, 0) - - return W \ No newline at end of file + + return W + + +def compute_inter_cluster_distances( + tree: CassiopeiaTree, + meta_item: Optional[str] = None, + meta_data: Optional[pd.DataFrame] = None, + dissimilarity_map: Optional[pd.DataFrame] = None, + distance_function: Callable = net_relatedness_index, + **kwargs, +) -> pd.DataFrame: + """Computes mean distance between clusters. + + Compute the mean distance between categories in a categorical variable. By + default, the phylogenetic weight matrix will be computed and used for this + distance calculation, but a user can optionally provide a dissimilarity + map instead. + + Args: + tree: CassiopeiaTree + meta_item: Column in the cell meta data of the tree. If `meta_data` is + specified, this is ignored. + meta_data: Meta data to use for this calculation. This argument takes + priority over meta_item. + dissimilarity_map: Dissimilarity map to use for distances. If this is + specified, the phylogenetic weight matrix is not computed. + number_of_neighbors: Number of nearest neighbors to use for computing + the mean distances. If this is not specified, then all cells are + used. + **kwargs: Arguments to pass to the distance function. + + Returns: + A K x K distance matrix. + """ + meta_data = tree.cell_meta[meta_item] if (meta_data is None) else meta_data + + # ensure that the meta data is categorical + if not pd.api.types.is_string_dtype(meta_data): + raise CassiopeiaError("Meta data must be categorical or a string.") + + D = ( + compute_phylogenetic_weight_matrix(tree) + if (dissimilarity_map is None) + else dissimilarity_map + ) + + unique_states = meta_data.unique() + K = len(unique_states) + inter_cluster_distances = pd.DataFrame( + np.zeros((K, K)), index=unique_states, columns=unique_states + ) + + for state1 in unique_states: + indices_1 = np.where(np.array(meta_data) == state1)[0] + for state2 in unique_states: + indices_2 = np.where(np.array(meta_data) == state2)[0] + + distance = distance_function( + D.values, indices_1, indices_2, **kwargs + ) + inter_cluster_distances.loc[state1, state2] = distance + + return inter_cluster_distances + + +@numba.jit(nopython=True) +def net_relatedness_index( + dissimilarity_map: np.array, indices_1: np.array, indices_2: np.array +) -> float: + """Computes the net relatedness index between indices. + + Using the dissimilarity map specified and the indices of samples, compute + the net relatedness index, defined as: + + sum(distances over i,j in indices_1,indices_2) / (|indices_1| x |indices_2|) + + Args: + dissimilarity_map: Dissimilarity map between all samples. + indices_1: Indices corresponding to the first group. + indices_2: Indices corresponding to the second group. + + Returns: + The Net Relatedness Index (NRI) + """ + + nri = 0 + for i in indices_1: + for j in indices_2: + nri += dissimilarity_map[i, j] + + return nri / (len(indices_1) * len(indices_2)) diff --git a/cassiopeia/tools/topology.py b/cassiopeia/tools/topology.py index 0549559e..3bb98094 100644 --- a/cassiopeia/tools/topology.py +++ b/cassiopeia/tools/topology.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from cassiopeia.data import CassiopeiaTree +from cassiopeia.data import CassiopeiaTree, compute_phylogenetic_weight_matrix from cassiopeia.mixins import CassiopeiaError From a6173ab545ecdacb6a9d887cd19f646c96a7970e Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 16:33:33 -0700 Subject: [PATCH 02/12] tested inter cluster distance function --- cassiopeia/data/__init__.py | 2 + cassiopeia/data/utilities.py | 57 +++++++------ test/data_tests/data_utilities_test.py | 111 +++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 29 deletions(-) diff --git a/cassiopeia/data/__init__.py b/cassiopeia/data/__init__.py index c48b9e2b..026cfd77 100644 --- a/cassiopeia/data/__init__.py +++ b/cassiopeia/data/__init__.py @@ -3,8 +3,10 @@ from .CassiopeiaTree import CassiopeiaTree from .utilities import ( compute_dissimilarity_map, + compute_inter_cluster_distances, compute_phylogenetic_weight_matrix, get_lca_characters, + net_relatedness_index, sample_bootstrap_allele_tables, sample_bootstrap_character_matrices, to_newick, diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py index 71d9ca53..40c66c7f 100755 --- a/cassiopeia/data/utilities.py +++ b/cassiopeia/data/utilities.py @@ -414,6 +414,33 @@ def compute_phylogenetic_weight_matrix( np.fill_diagonal(W.values, 0) return W + +@numba.jit(nopython=True) +def net_relatedness_index( + dissimilarity_map: np.array, indices_1: np.array, indices_2: np.array +) -> float: + """Computes the net relatedness index between indices. + + Using the dissimilarity map specified and the indices of samples, compute + the net relatedness index, defined as: + + sum(distances over i,j in indices_1,indices_2) / (|indices_1| x |indices_2|) + + Args: + dissimilarity_map: Dissimilarity map between all samples. + indices_1: Indices corresponding to the first group. + indices_2: Indices corresponding to the second group. + + Returns: + The Net Relatedness Index (NRI) + """ + + nri = 0 + for i in indices_1: + for j in indices_2: + nri += dissimilarity_map[i, j] + + return nri / (len(indices_1) * len(indices_2)) def compute_inter_cluster_distances( @@ -475,32 +502,4 @@ def compute_inter_cluster_distances( ) inter_cluster_distances.loc[state1, state2] = distance - return inter_cluster_distances - - -@numba.jit(nopython=True) -def net_relatedness_index( - dissimilarity_map: np.array, indices_1: np.array, indices_2: np.array -) -> float: - """Computes the net relatedness index between indices. - - Using the dissimilarity map specified and the indices of samples, compute - the net relatedness index, defined as: - - sum(distances over i,j in indices_1,indices_2) / (|indices_1| x |indices_2|) - - Args: - dissimilarity_map: Dissimilarity map between all samples. - indices_1: Indices corresponding to the first group. - indices_2: Indices corresponding to the second group. - - Returns: - The Net Relatedness Index (NRI) - """ - - nri = 0 - for i in indices_1: - for j in indices_2: - nri += dissimilarity_map[i, j] - - return nri / (len(indices_1) * len(indices_2)) + return inter_cluster_distances \ No newline at end of file diff --git a/test/data_tests/data_utilities_test.py b/test/data_tests/data_utilities_test.py index 8a9d4402..e846549d 100755 --- a/test/data_tests/data_utilities_test.py +++ b/test/data_tests/data_utilities_test.py @@ -11,6 +11,7 @@ from cassiopeia.data import CassiopeiaTree from cassiopeia.data import utilities as data_utilities +from cassiopeia.mixins.errors import CassiopeiaError from cassiopeia.preprocess import utilities as preprocessing_utilities @@ -405,6 +406,116 @@ def test_phylogenetic_weights_matrix_inverse_fn(self): pd.testing.assert_frame_equal(weight_matrix, expected_weight_matrix) + def test_net_relatedness_index(self): + + distances = np.array( + [[0, 1, 2, 4], [1, 0, 3, 6], [2, 3, 0, 5], [4, 6, 5, 0]] + ) + indices_1 = np.array([0, 1]) + indices_2 = np.array([2, 3]) + + nri = data_utilities.net_relatedness_index( + distances, indices_1, indices_2 + ) + self.assertAlmostEqual(15.0 / 4.0, nri, delta=0.0001) + + def test_inter_cluster_distance_basic(self): + + tree = nx.DiGraph() + tree.add_nodes_from(["A", "B", "C", "D", "E", "F"]) + tree.add_edge("F", "A", length=0.1) + tree.add_edge("F", "B", length=0.2) + tree.add_edge("F", "E", length=0.5) + tree.add_edge("E", "C", length=0.3) + tree.add_edge("E", "D", length=0.4) + + meta_data = pd.DataFrame.from_dict( + { + "A": ["TypeA", 10], + "B": ["TypeA", 5], + "C": ["TypeB", 3], + "D": ["TypeB", 22], + }, + orient="index", + columns=["CellType", "nUMI"], + ) + + tree = CassiopeiaTree(tree=tree, cell_meta=meta_data) + + inter_cluster_distances = data_utilities.compute_inter_cluster_distances( + tree, meta_item="CellType" + ) + + expected_distances = pd.DataFrame.from_dict( + {"TypeA": [0.15, 1.0], "TypeB": [1.0, 0.35]}, + orient="index", + columns=["TypeA", "TypeB"], + ) + + pd.testing.assert_frame_equal( + expected_distances, inter_cluster_distances + ) + + self.assertRaises( + CassiopeiaError, + data_utilities.compute_inter_cluster_distances, + tree, + "nUMI", + ) + + def test_inter_cluster_distance_custom_input(self): + + tree = nx.DiGraph() + tree.add_nodes_from(["A", "B", "C", "D", "E", "F"]) + tree.add_edge("F", "A", length=0.1) + tree.add_edge("F", "B", length=0.2) + tree.add_edge("F", "E", length=0.5) + tree.add_edge("E", "C", length=0.3) + tree.add_edge("E", "D", length=0.4) + + meta_data = pd.DataFrame.from_dict( + { + "A": ["TypeA", 10], + "B": ["TypeA", 5], + "C": ["TypeB", 3], + "D": ["TypeB", 22], + }, + orient="index", + columns=["CellType", "nUMI"], + ) + + weight_matrix = pd.DataFrame.from_dict( + { + "A": [0.0, 0.5, 1.2, 0.4], + "B": [0.5, 0.0, 3.0, 1.1], + "C": [1.2, 3.0, 0.0, 0.8], + "D": [0.4, 1.1, 0.8, 0.0], + }, + orient="index", + columns=["A", "B", "C", "D"], + ) + + tree = CassiopeiaTree(tree=tree) + + inter_cluster_distances = data_utilities.compute_inter_cluster_distances( + tree, + meta_data=meta_data["CellType"], + dissimilarity_map=weight_matrix, + ) + + expected_distances = pd.DataFrame.from_dict( + {"TypeA": [0.25, 1.425], "TypeB": [1.425, 0.4]}, + orient="index", + columns=["TypeA", "TypeB"], + ) + + pd.testing.assert_frame_equal( + expected_distances, + inter_cluster_distances, + check_exact=False, + atol=0.001, + ) + if __name__ == "__main__": unittest.main() From ff38f3931bfcf6779ee645c745d6eac9734ab910 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 17:01:26 -0700 Subject: [PATCH 03/12] wrote logic for evo copuling alg --- cassiopeia/tools/coupling.py | 127 +++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 cassiopeia/tools/coupling.py diff --git a/cassiopeia/tools/coupling.py b/cassiopeia/tools/coupling.py new file mode 100644 index 00000000..dc988a03 --- /dev/null +++ b/cassiopeia/tools/coupling.py @@ -0,0 +1,127 @@ +""" +File storing functionality for computing coupling statistics between meta +variables on a tree. +""" +from typing import Optional + +from collections import defaultdict +import numpy as np +import pandas as pd +from tqdm import tqdm + +from cassiopeia.data import CassiopeiaTree +from cassiopeia.data import utilities as data_utilities + + +def compute_evolutionary_coupling( + tree: CassiopeiaTree, + meta_variable: str, + minimum_proportion: float = 0.05, + number_of_shuffles: int = 500, + random_state: Optional[np.random.RandomState] = None, + dissimilarity_map: Optional[pd.DataFrame] = None, + cluster_comparison_function: Optional[ + Callable + ] = data_utilitiesnet_relatedness_index, + **comparison_kwargs, +) -> pd.DataFrame: + """Computes Evolutionary Coupling of categorical variables. + + Using the methodology described in Yang, Jones et al, BioRxiv (2021), this + function will compute the "evolutionary coupling" statistic between values + that a categorical variable can take on with the tree. For example, this + categorical variable can be a "cell type", and this function will compute + the evolutionary couplings between all types of cell types. This indicates + how closely related these cell types are to one another. + + Briefly, this statistic is the Z-normalized mean distance between categories + in the specified categorical variable. + + The computational complexity of this function is O(n^2 log n + Bk^2) for a + tree with n leaves, a variable with k categories, and B random shuffles. + + Args: + tree: CassiopeiaTree + meta_variable: Column in `tree.cell_meta` that stores a categorical + variable with K categories. + minimum_proportion: Minimum proportion of cells that a category needs + to appear in to be considered. + number_of_shuffles: Number of times to shuffle the data to compute the + empirical Z score. + random_state: Numpy random state to parameterize the shuffling. + dissimilarity_map: A precomputed dissimilarity map between all leaves. + cluster_comparison_function: A function for comparing the mean distance + between groups. By default, this is the Net Relatedness Index. + **comparison_kwargs: Extra arguments to pass to the cluster comparison + function. + + Returns: + A K x K evolutionary coupling dataframe. + """ + + W = ( + data_utilities.compute_phylogenetic_weight_matrix(tree) + if (dissimilarity_map is None) + else dissimilarity_map + ) + + meta_data = tree.cell_meta[meta_variable] + + # subset meta data by minimum proportion + if minimum_proportion > 0: + filter_threshold = int(len(tree.leaves) * minimum_proportion) + category_frequencies = meta_data.value_counts() + passing_categories = category_frequencies[ + category_frequencies > filter_threshold + ].index.values + meta_data = meta_data[meta_data[meta_variable].isin(passing_categories)] + W = W.loc[meta_data.index.values, meta_data.index.values] + + # compute inter-cluster distances + inter_cluster_distances = data_utilities.compute_inter_cluster_distances( + tree, + meta_data=meta_data, + dissimilarity_map=W, + distance_function=cluster_comparison_function, + **comparison_kwargs, + ) + + # compute background for Z-scoring + background = defaultdict(list) + for _ in tqdm(number_of_shuffles, desc="Creating empirical background"): + + permuted_assignments = meta_data.copy() + if random_state: + permuted_assignments.index = random_state.permutation( + meta_data.index.values + ) + else: + permuted_assignments.index = np.random.permutation( + meta_data.index.values + ) + + background_distances = data_utilities.compute_inter_cluster_distances( + tree, + permuted_assignments, + dissimilarity_map=W, + distance_function=cluster_comparison_function, + **kwargs, + ) + + for s1 in background_distances.index: + for s2 in background_distances.columns: + background[(s1, s2)].append(background_distances.loc[s1, s2]) + + Z_scores = inter_cluster_distances.copy() + for s1 in Z_scores.index: + for s2 in Z_scores.columns: + mean = np.mean(background[(s1, s2)]) + sd = np.std(background[(s1, s2)]) + + Z_scores.loc[s1, s2] = ( + inter_cluster_distances.loc[s1, s2] - mean + ) / sd + + Z_scores.fillna(0, inplace=True) + + return Z_scores From 4c7e187c978a192bf3332ae90ed95c36b6e382a9 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Tue, 26 Oct 2021 17:05:51 -0700 Subject: [PATCH 04/12] updated small bug in arguments --- cassiopeia/tools/coupling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassiopeia/tools/coupling.py b/cassiopeia/tools/coupling.py index dc988a03..6a76e5b3 100644 --- a/cassiopeia/tools/coupling.py +++ b/cassiopeia/tools/coupling.py @@ -105,7 +105,7 @@ def compute_evolutionary_coupling( permuted_assignments, dissimilarity_map=W, distance_function=cluster_comparison_function, - **kwargs, + **comparison_kwargs, ) for s1 in background_distances.index: From b3099bc66b0532ece53aec1bde419728c874c142 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Wed, 27 Oct 2021 14:40:21 -0700 Subject: [PATCH 05/12] added integration tests for evolutionary coupling --- cassiopeia/data/utilities.py | 3 +++ cassiopeia/tools/__init__.py | 1 + cassiopeia/tools/coupling.py | 19 ++++++++----------- test/data_tests/data_utilities_test.py | 1 - 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py index 40c66c7f..6c9ed0af 100755 --- a/cassiopeia/data/utilities.py +++ b/cassiopeia/data/utilities.py @@ -492,6 +492,9 @@ def compute_inter_cluster_distances( np.zeros((K, K)), index=unique_states, columns=unique_states ) + # align distance matrix and meta_data + D = D.loc[meta_data.index.values, meta_data.index.values] + for state1 in unique_states: indices_1 = np.where(np.array(meta_data) == state1)[0] for state2 in unique_states: diff --git a/cassiopeia/tools/__init__.py b/cassiopeia/tools/__init__.py index 4c39eab6..e549cbc1 100644 --- a/cassiopeia/tools/__init__.py +++ b/cassiopeia/tools/__init__.py @@ -2,5 +2,6 @@ from .autocorrelation import compute_morans_i from .branch_length_estimator import IIDExponentialBayesian, IIDExponentialMLE +from .coupling import compute_evolutionary_coupling from .small_parsimony import fitch_count, fitch_hartigan, score_small_parsimony from .topology import compute_expansion_pvalues \ No newline at end of file diff --git a/cassiopeia/tools/coupling.py b/cassiopeia/tools/coupling.py index 6a76e5b3..66fba3b6 100644 --- a/cassiopeia/tools/coupling.py +++ b/cassiopeia/tools/coupling.py @@ -2,7 +2,7 @@ File storing functionality for computing coupling statistics between meta variables on a tree. """ -from typing import Optional +from typing import Callable, Optional from collections import defaultdict import numpy as np @@ -22,7 +22,7 @@ def compute_evolutionary_coupling( dissimilarity_map: Optional[pd.DataFrame] = None, cluster_comparison_function: Optional[ Callable - ] = data_utilitiesnet_relatedness_index, + ] = data_utilities.net_relatedness_index, **comparison_kwargs, ) -> pd.DataFrame: """Computes Evolutionary Coupling of categorical variables. @@ -35,7 +35,9 @@ def compute_evolutionary_coupling( how closely related these cell types are to one another. Briefly, this statistic is the Z-normalized mean distance between categories - in the specified categorical variable. + in the specified categorical variable. Note that empirical nulls that have a + standard deviation of 0 lead to NaNs in the resulting evolutionary coupling + matrix. The computational complexity of this function is O(n^2 log n + Bk^2) for a tree with n leaves, a variable with k categories, and B random shuffles. @@ -74,7 +76,7 @@ def compute_evolutionary_coupling( passing_categories = category_frequencies[ category_frequencies > filter_threshold ].index.values - meta_data = meta_data[meta_data[meta_variable].isin(passing_categories)] + meta_data = meta_data[meta_data.isin(passing_categories)] W = W.loc[meta_data.index.values, meta_data.index.values] # compute inter-cluster distances @@ -88,8 +90,7 @@ def compute_evolutionary_coupling( # compute background for Z-scoring background = defaultdict(list) - for _ in tqdm(number_of_shuffles, desc="Creating empirical background"): - + for _ in tqdm(range(number_of_shuffles), desc="Creating empirical background"): permuted_assignments = meta_data.copy() if random_state: permuted_assignments.index = random_state.permutation( @@ -99,15 +100,13 @@ def compute_evolutionary_coupling( permuted_assignments.index = np.random.permutation( meta_data.index.values ) - background_distances = data_utilities.compute_inter_cluster_distances( tree, - permuted_assignments, + meta_data=permuted_assignments, dissimilarity_map=W, distance_function=cluster_comparison_function, **comparison_kwargs, ) - for s1 in background_distances.index: for s2 in background_distances.columns: background[(s1, s2)].append(background_distances.loc[s1, s2]) @@ -122,6 +121,4 @@ def compute_evolutionary_coupling( inter_cluster_distances.loc[s1, s2] - mean ) / sd - Z_scores.fillna(0, inplace=True) - return Z_scores diff --git a/test/data_tests/data_utilities_test.py b/test/data_tests/data_utilities_test.py index e846549d..b05dde58 100755 --- a/test/data_tests/data_utilities_test.py +++ b/test/data_tests/data_utilities_test.py @@ -3,7 +3,6 @@ """ import unittest -from typing import Dict, Optional import networkx as nx import numpy as np From 922057cfa684a1b77e111932cac477bd823f95e9 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Wed, 27 Oct 2021 14:47:08 -0700 Subject: [PATCH 06/12] updated docs --- docs/api/data.rst | 1 + docs/api/tools.rst | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/docs/api/data.rst b/docs/api/data.rst index 6d03519a..91625736 100644 --- a/docs/api/data.rst +++ b/docs/api/data.rst @@ -23,6 +23,7 @@ We also have several utilities that are useful for working with various data rel :toctree: reference/ data.compute_dissimilarity_map + data.compute_inter_cluster_distances data.compute_phylogenetic_weight_matrix data.get_lca_characters data.sample_bootstrap_allele_tables diff --git a/docs/api/tools.rst b/docs/api/tools.rst index 7352008d..afbade9b 100644 --- a/docs/api/tools.rst +++ b/docs/api/tools.rst @@ -14,9 +14,30 @@ Small-Parsimony .. autosummary:: :toctree: reference/ - - tl.compute_expansion_pvalues - tl.compute_morans_i + tl.fitch_count tl.fitch_hartigan tl.score_small_parsimony + +Autocorrelation +~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: reference/ + + tl.compute_morans_i + +Coupling +~~~~~~~~~~~ + +.. autosummary:: + :toctree: reference/ + + tl.compute_evolutionary_coupling + +Topology +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: reference/ + + tl.compute_expansion_pvalues \ No newline at end of file From 4f98f254bca63d627f87915222d4bce4540d332d Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Wed, 27 Oct 2021 14:56:56 -0700 Subject: [PATCH 07/12] updated documentation --- cassiopeia/data/utilities.py | 4 +++- cassiopeia/tools/coupling.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py index 6c9ed0af..e3ffb5b8 100755 --- a/cassiopeia/data/utilities.py +++ b/cassiopeia/data/utilities.py @@ -442,7 +442,6 @@ def net_relatedness_index( return nri / (len(indices_1) * len(indices_2)) - def compute_inter_cluster_distances( tree: CassiopeiaTree, meta_item: Optional[str] = None, @@ -458,6 +457,9 @@ def compute_inter_cluster_distances( distance calculation, but a user can optionally provide a dissimilarity map instead. + This function performs the computation in O(K^2)*O(distance_function) time + for a variable with K categories. + Args: tree: CassiopeiaTree meta_item: Column in the cell meta data of the tree. If `meta_data` is diff --git a/cassiopeia/tools/coupling.py b/cassiopeia/tools/coupling.py index 66fba3b6..0726acfc 100644 --- a/cassiopeia/tools/coupling.py +++ b/cassiopeia/tools/coupling.py @@ -39,8 +39,9 @@ def compute_evolutionary_coupling( standard deviation of 0 lead to NaNs in the resulting evolutionary coupling matrix. - The computational complexity of this function is O(n^2 log n + Bk^2) for a - tree with n leaves, a variable with k categories, and B random shuffles. + The computational complexity of this function is + O(n^2 log n + (B+1)(K^2 * O(distance_function)) for a tree with n leaves, a + variable with K categories, and B random shuffles. Args: tree: CassiopeiaTree From 1cce1ea8474fb261667389972255c5918ed53196 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Wed, 27 Oct 2021 15:02:01 -0700 Subject: [PATCH 08/12] added tests for couplings --- test/tools_tests/coupling_test.py | 213 ++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 test/tools_tests/coupling_test.py diff --git a/test/tools_tests/coupling_test.py b/test/tools_tests/coupling_test.py new file mode 100644 index 00000000..b59b39f7 --- /dev/null +++ b/test/tools_tests/coupling_test.py @@ -0,0 +1,213 @@ +""" +Tests for the coupling estimators implemented in cassiopeia/tools/coupling.py +""" +import unittest + +import networkx as nx +import numpy as np +import pandas as pd + +import cassiopeia as cas +from cassiopeia.data import CassiopeiaTree +from cassiopeia.data import utilities as data_utilities +from cassiopeia.mixins import CassiopeiaError + + +class TestDataUtilities(unittest.TestCase): + def setUp(self) -> None: + + tree = nx.DiGraph() + tree.add_edges_from( + [ + ("A", "B"), + ("A", "C"), + ("B", "D"), + ("B", "E"), + ("B", "F"), + ("E", "G"), + ("E", "H"), + ("C", "I"), + ("C", "J"), + ] + ) + + meta_data = pd.DataFrame.from_dict( + { + "D": ["TypeB", 10], + "F": ["TypeA", 5], + "G": ["TypeA", 3], + "H": ["TypeB", 22], + "I": ["TypeC", 2], + "J": ["TypeC", 11], + }, + orient="index", + columns=["CellType", "nUMI"], + ) + + self.tree = CassiopeiaTree(tree=tree, cell_meta=meta_data) + + def test_evolutionary_coupling_basic(self): + + random_state = np.random.RandomState(1231234) + + evolutionary_coupling = cas.tl.compute_evolutionary_coupling( + self.tree, + meta_variable="CellType", + random_state=random_state, + minimum_proportion=0.0, + number_of_shuffles=10, + ) + + inter_cluster_distances = data_utilities.compute_inter_cluster_distances( + self.tree, meta_item="CellType" + ) + + # background computed with random seed set above and 10 shuffles + # (state1, state2): (mean, sd) + expected_summary_stats = { + ("TypeA", "TypeA"): (1.7, 0.6000000000000001), + ("TypeA", "TypeB"): (3.55, 0.4716990566028302), + ("TypeA", "TypeC"): (3.55, 0.4716990566028302), + ("TypeB", "TypeA"): (3.55, 0.4716990566028302), + ("TypeB", "TypeB"): (2.0, 0.5), + ("TypeB", "TypeC"): (3.65, 0.45), + ("TypeC", "TypeA"): (3.55, 0.4716990566028302), + ("TypeC", "TypeB"): (3.65, 0.45), + ("TypeC", "TypeC"): (1.8, 0.5567764362830022), + } + + expected_coupling = inter_cluster_distances.copy() + for s1 in expected_coupling.index: + for s2 in expected_coupling.columns: + mean = expected_summary_stats[(s1, s2)][0] + sd = expected_summary_stats[(s1, s2)][1] + + expected_coupling.loc[s1, s2] = ( + inter_cluster_distances.loc[s1, s2] - mean + ) / sd + + pd.testing.assert_frame_equal( + expected_coupling, evolutionary_coupling, atol=0.001 + ) + + # make sure errors are raised for numerical data + self.assertRaises( + CassiopeiaError, + cas.tl.compute_evolutionary_coupling, + self.tree, + "nUMI", + ) + + def test_evolutionary_coupling_custom_dissimilarity_map(self): + + weight_matrix = pd.DataFrame.from_dict( + { + "D": [0.0, 0.5, 1.2, 0.4, 0.5, 0.6], + "F": [0.5, 0.0, 3.0, 1.1, 3.0, 0.1], + "G": [1.2, 3.0, 0.0, 0.8, 0.2, 0.8], + "H": [0.4, 1.1, 0.8, 0.0, 2.0, 2.1], + "I": [0.5, 3.0, 0.2, 2.0, 0.0, 0.1], + "J": [0.6, 0.1, 1.8, 2.1, 0.1, 0.0], + }, + orient="index", + columns=["D", "F", "G", "H", "I", "J"], + ) + + random_state = np.random.RandomState(1231234) + + evolutionary_coupling = cas.tl.compute_evolutionary_coupling( + self.tree, + meta_variable="CellType", + random_state=random_state, + minimum_proportion=0.0, + number_of_shuffles=10, + dissimilarity_map=weight_matrix, + ) + + inter_cluster_distances = data_utilities.compute_inter_cluster_distances( + self.tree, meta_item="CellType", dissimilarity_map=weight_matrix + ) + + # background computed with random seed set above and 10 shuffles + # (state1, state2): (mean, sd) + expected_summary_stats = { + ("TypeB", "TypeB"): (0.695, 0.5456418239101545), + ("TypeB", "TypeA"): (1.0000000000000002, 0.281291663580704), + ("TypeB", "TypeC"): (1.0925, 0.44763964301656745), + ("TypeA", "TypeB"): (1.0000000000000002, 0.3148412298286232), + ("TypeA", "TypeA"): (0.63, 0.4550824101193101), + ("TypeA", "TypeC"): (1.2349999999999999, 0.391503512117069), + ("TypeC", "TypeB"): (1.0675000000000001, 0.4493119740225047), + ("TypeC", "TypeA"): (1.26, 0.41791147387933725), + ("TypeC", "TypeC"): (0.4699999999999999, 0.41424630354415953), + } + + expected_coupling = inter_cluster_distances.copy() + for s1 in expected_coupling.index: + for s2 in expected_coupling.columns: + mean = expected_summary_stats[(s1, s2)][0] + sd = expected_summary_stats[(s1, s2)][1] + + expected_coupling.loc[s1, s2] = ( + inter_cluster_distances.loc[s1, s2] - mean + ) / sd + + pd.testing.assert_frame_equal( + expected_coupling, evolutionary_coupling, atol=0.001 + ) + + def test_evolutionary_coupling_minimum_proportion(self): + + self.tree.cell_meta.loc["J", "CellType"] = "TypeD" + + random_state = np.random.RandomState(1231234) + + evolutionary_coupling = cas.tl.compute_evolutionary_coupling( + self.tree, + meta_variable="CellType", + random_state=random_state, + minimum_proportion=1 / 6, # This will drop types C and D + number_of_shuffles=10, + ) + + # make sure TypeC and TypeD are not in the evolutionary coupling matrix + expected_types = ["TypeA", "TypeB"] + self.assertCountEqual(expected_types, evolutionary_coupling.index) + self.assertCountEqual(expected_types, evolutionary_coupling.columns) + + # make sure couplings are correct + inter_cluster_distances = data_utilities.compute_inter_cluster_distances( + self.tree, meta_item="CellType" + ) + + inter_cluster_distances = inter_cluster_distances.loc[ + expected_types, expected_types + ] + + expected_summary_stats = { + ("TypeB", "TypeB"): (1.4, 0.19999999999999998), + ("TypeB", "TypeA"): (2.6, 0.19999999999999998), + ("TypeA", "TypeB"): (2.6, 0.19999999999999998), + ("TypeA", "TypeA"): (1.4, 0.19999999999999998), + } + + expected_coupling = inter_cluster_distances.copy() + for s1 in expected_coupling.index: + for s2 in expected_coupling.columns: + mean = expected_summary_stats[(s1, s2)][0] + sd = expected_summary_stats[(s1, s2)][1] + + expected_coupling.loc[s1, s2] = ( + inter_cluster_distances.loc[s1, s2] - mean + ) / sd + + evolutionary_coupling = evolutionary_coupling.loc[ + expected_types, expected_types + ] + pd.testing.assert_frame_equal( + expected_coupling, evolutionary_coupling, atol=0.001 + ) + + +if __name__ == "__main__": + unittest.main() From 38b90e463e6860f9bdf21c9ec7af5001832a7fd6 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Wed, 27 Oct 2021 15:15:40 -0700 Subject: [PATCH 09/12] don't cover net relatedness index because it gets jit compiled --- cassiopeia/data/utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py index e3ffb5b8..0469f174 100755 --- a/cassiopeia/data/utilities.py +++ b/cassiopeia/data/utilities.py @@ -418,7 +418,7 @@ def compute_phylogenetic_weight_matrix( @numba.jit(nopython=True) def net_relatedness_index( dissimilarity_map: np.array, indices_1: np.array, indices_2: np.array -) -> float: +) -> float: # pragma: no cover """Computes the net relatedness index between indices. Using the dissimilarity map specified and the indices of samples, compute From 812b1fb843a23f1f11b7945e76d412f6ef337799 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 1 Nov 2021 15:09:53 -0700 Subject: [PATCH 10/12] updated docs with branch length inference --- docs/api/critique.rst | 16 ------- docs/api/data.rst | 31 ------------- docs/api/index.rst | 19 -------- docs/api/plotting.rst | 18 -------- docs/api/preprocess.rst | 43 ------------------- .../cassiopeia.data.CassiopeiaTree.rst | 13 ------ ...ia.data.sample_bootstrap_allele_tables.rst | 6 --- ...ta.sample_bootstrap_character_matrices.rst | 6 --- .../reference/cassiopeia.data.to_newick.rst | 6 --- .../cassiopeia.pl.upload_and_export_itol.rst | 6 --- .../cassiopeia.pp.align_sequences.rst | 6 --- .../reference/cassiopeia.pp.call_alleles.rst | 6 --- .../cassiopeia.pp.call_lineage_groups.rst | 6 --- .../reference/cassiopeia.pp.collapse_umis.rst | 6 --- ...peia.pp.compute_empirical_indel_priors.rst | 6 --- ...onvert_alleletable_to_character_matrix.rst | 6 --- ...convert_alleletable_to_lineage_profile.rst | 6 --- ...rt_lineage_profile_to_character_matrix.rst | 6 --- .../cassiopeia.pp.error_correct_umis.rst | 6 --- .../reference/cassiopeia.pp.filter_cells.rst | 6 --- .../cassiopeia.pp.filter_molecule_table.rst | 6 --- .../reference/cassiopeia.pp.filter_umis.rst | 6 --- .../cassiopeia.pp.resolve_umi_sequence.rst | 6 --- .../cassiopeia.solver.HybridSolver.rst | 13 ------ .../reference/cassiopeia.solver.ILPSolver.rst | 13 ------ .../cassiopeia.solver.MaxCutGreedySolver.rst | 13 ------ .../cassiopeia.solver.MaxCutSolver.rst | 13 ------ ...assiopeia.solver.NeighborJoiningSolver.rst | 13 ------ .../cassiopeia.solver.PercolationSolver.rst | 13 ------ ...eia.solver.SharedMutationJoiningSolver.rst | 13 ------ ...cassiopeia.solver.SpectralGreedySolver.rst | 13 ------ .../cassiopeia.solver.SpectralSolver.rst | 13 ------ .../cassiopeia.solver.UPGMASolver.rst | 13 ------ .../cassiopeia.solver.VanillaGreedySolver.rst | 13 ------ docs/api/simulator.rst | 41 ------------------ docs/api/solver.rst | 41 ------------------ docs/api/tools.rst | 43 ------------------- 37 files changed, 510 deletions(-) delete mode 100644 docs/api/critique.rst delete mode 100644 docs/api/data.rst delete mode 100644 docs/api/index.rst delete mode 100644 docs/api/plotting.rst delete mode 100644 docs/api/preprocess.rst delete mode 100644 docs/api/reference/cassiopeia.data.CassiopeiaTree.rst delete mode 100644 docs/api/reference/cassiopeia.data.sample_bootstrap_allele_tables.rst delete mode 100644 docs/api/reference/cassiopeia.data.sample_bootstrap_character_matrices.rst delete mode 100644 docs/api/reference/cassiopeia.data.to_newick.rst delete mode 100644 docs/api/reference/cassiopeia.pl.upload_and_export_itol.rst delete mode 100644 docs/api/reference/cassiopeia.pp.align_sequences.rst delete mode 100644 docs/api/reference/cassiopeia.pp.call_alleles.rst delete mode 100644 docs/api/reference/cassiopeia.pp.call_lineage_groups.rst delete mode 100644 docs/api/reference/cassiopeia.pp.collapse_umis.rst delete mode 100644 docs/api/reference/cassiopeia.pp.compute_empirical_indel_priors.rst delete mode 100644 docs/api/reference/cassiopeia.pp.convert_alleletable_to_character_matrix.rst delete mode 100644 docs/api/reference/cassiopeia.pp.convert_alleletable_to_lineage_profile.rst delete mode 100644 docs/api/reference/cassiopeia.pp.convert_lineage_profile_to_character_matrix.rst delete mode 100644 docs/api/reference/cassiopeia.pp.error_correct_umis.rst delete mode 100644 docs/api/reference/cassiopeia.pp.filter_cells.rst delete mode 100644 docs/api/reference/cassiopeia.pp.filter_molecule_table.rst delete mode 100644 docs/api/reference/cassiopeia.pp.filter_umis.rst delete mode 100644 docs/api/reference/cassiopeia.pp.resolve_umi_sequence.rst delete mode 100644 docs/api/reference/cassiopeia.solver.HybridSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.ILPSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.MaxCutGreedySolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.MaxCutSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.NeighborJoiningSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.PercolationSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.SharedMutationJoiningSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.SpectralGreedySolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.SpectralSolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.UPGMASolver.rst delete mode 100644 docs/api/reference/cassiopeia.solver.VanillaGreedySolver.rst delete mode 100644 docs/api/simulator.rst delete mode 100644 docs/api/solver.rst delete mode 100644 docs/api/tools.rst diff --git a/docs/api/critique.rst b/docs/api/critique.rst deleted file mode 100644 index 46694706..00000000 --- a/docs/api/critique.rst +++ /dev/null @@ -1,16 +0,0 @@ -=========== -Critique -=========== -.. module:: cassiopeia.critique -.. currentmodule:: cassiopeia - -Critique -~~~~~~~~~~~~~~~~~~~ - -We support functionality for comparing trees to one another, for example when benchmarking new algorithms. - -.. autosummary:: - :toctree: reference/ - - critique.robinson_foulds - critique.triplets_correct \ No newline at end of file diff --git a/docs/api/data.rst b/docs/api/data.rst deleted file mode 100644 index 91625736..00000000 --- a/docs/api/data.rst +++ /dev/null @@ -1,31 +0,0 @@ -=========== -Data -=========== -.. module:: cassiopeia.data -.. currentmodule:: cassiopeia - -CassiopeiaTrees -~~~~~~~~~~~~~~~~~~~ - -The main data structure that Cassiopeia uses for all tree-based analyses is the CassiopeiaTree: - -.. autosummary:: - :toctree: reference/ - - data.CassiopeiaTree - -Utilities -~~~~~~~~~~~~~~~~~~~ - -We also have several utilities that are useful for working with various data related to phylogenetics: - -.. autosummary:: - :toctree: reference/ - - data.compute_dissimilarity_map - data.compute_inter_cluster_distances - data.compute_phylogenetic_weight_matrix - data.get_lca_characters - data.sample_bootstrap_allele_tables - data.sample_bootstrap_character_matrices - data.to_newick \ No newline at end of file diff --git a/docs/api/index.rst b/docs/api/index.rst deleted file mode 100644 index 77cc9c95..00000000 --- a/docs/api/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -=== -API -=== - - -Import Cassiopeia as:: - - import cassiopeia as cas - -.. toctree:: - :maxdepth: 1 - - preprocess - data - critique - solver - simulator - plotting - tools \ No newline at end of file diff --git a/docs/api/plotting.rst b/docs/api/plotting.rst deleted file mode 100644 index c738dba3..00000000 --- a/docs/api/plotting.rst +++ /dev/null @@ -1,18 +0,0 @@ -========== -Plotting -========== - -.. module:: cassiopeia.pl -.. currentmodule:: cassiopeia - -Plotting -~~~~~~~~~~~~~~~~~~~ - -Currently, our plotting functionality is linked to the rich iTOL framework: - -.. autosummary:: - :toctree: reference/ - - pl.upload_and_export_itol - - \ No newline at end of file diff --git a/docs/api/preprocess.rst b/docs/api/preprocess.rst deleted file mode 100644 index f79eb921..00000000 --- a/docs/api/preprocess.rst +++ /dev/null @@ -1,43 +0,0 @@ -=========== -Preprocess -=========== -.. module:: cassiopeia.pp -.. currentmodule:: cassiopeia - -Data Preprocessing -~~~~~~~~~~~~~~~~~~~ - -We have several functions that are part of our pipeline for processing sequencing data from single-cell lineage tracing technologies: - -.. autosummary:: - :toctree: reference/ - - pp.align_sequences - pp.call_alleles - pp.call_lineage_groups - pp.collapse_umis - pp.convert_fastqs_to_unmapped_bam - pp.error_correct_cellbcs_to_whitelist - pp.error_correct_intbcs_to_whitelist - pp.error_correct_umis - pp.filter_bam - pp.filter_molecule_table - pp.filter_cells - pp.filter_umis - pp.resolve_umi_sequence - - - - -Data Utilities -~~~~~~~~~~~~~~~~~~~ - -We also have several functions that are useful for converting between data formats for downstream analyses: - -.. autosummary:: - :toctree: reference/ - - pp.compute_empirical_indel_priors - pp.convert_alleletable_to_character_matrix - pp.convert_alleletable_to_lineage_profile - pp.convert_lineage_profile_to_character_matrix \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.data.CassiopeiaTree.rst b/docs/api/reference/cassiopeia.data.CassiopeiaTree.rst deleted file mode 100644 index d0d90838..00000000 --- a/docs/api/reference/cassiopeia.data.CassiopeiaTree.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.data.CassiopeiaTree -============================== - -.. currentmodule:: cassiopeia.data - -.. autoclass:: CassiopeiaTree - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: CassiopeiaTree - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.data.sample_bootstrap_allele_tables.rst b/docs/api/reference/cassiopeia.data.sample_bootstrap_allele_tables.rst deleted file mode 100644 index 1639e437..00000000 --- a/docs/api/reference/cassiopeia.data.sample_bootstrap_allele_tables.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.data.sample\_bootstrap\_allele\_tables -================================================= - -.. currentmodule:: cassiopeia.data - -.. autofunction:: sample_bootstrap_allele_tables \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.data.sample_bootstrap_character_matrices.rst b/docs/api/reference/cassiopeia.data.sample_bootstrap_character_matrices.rst deleted file mode 100644 index 45ba28e1..00000000 --- a/docs/api/reference/cassiopeia.data.sample_bootstrap_character_matrices.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.data.sample\_bootstrap\_character\_matrices -====================================================== - -.. currentmodule:: cassiopeia.data - -.. autofunction:: sample_bootstrap_character_matrices \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.data.to_newick.rst b/docs/api/reference/cassiopeia.data.to_newick.rst deleted file mode 100644 index 6b0ff7e3..00000000 --- a/docs/api/reference/cassiopeia.data.to_newick.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.data.to\_newick -========================== - -.. currentmodule:: cassiopeia.data - -.. autofunction:: to_newick \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pl.upload_and_export_itol.rst b/docs/api/reference/cassiopeia.pl.upload_and_export_itol.rst deleted file mode 100644 index a8e70af8..00000000 --- a/docs/api/reference/cassiopeia.pl.upload_and_export_itol.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pl.upload\_and\_export\_itol -======================================= - -.. currentmodule:: cassiopeia.pl - -.. autofunction:: upload_and_export_itol \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.align_sequences.rst b/docs/api/reference/cassiopeia.pp.align_sequences.rst deleted file mode 100644 index 11927b42..00000000 --- a/docs/api/reference/cassiopeia.pp.align_sequences.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.align\_sequences -============================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: align_sequences \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.call_alleles.rst b/docs/api/reference/cassiopeia.pp.call_alleles.rst deleted file mode 100644 index 9bea85ee..00000000 --- a/docs/api/reference/cassiopeia.pp.call_alleles.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.call\_alleles -=========================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: call_alleles \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.call_lineage_groups.rst b/docs/api/reference/cassiopeia.pp.call_lineage_groups.rst deleted file mode 100644 index 8df5914d..00000000 --- a/docs/api/reference/cassiopeia.pp.call_lineage_groups.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.call\_lineage\_groups -=================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: call_lineage_groups \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.collapse_umis.rst b/docs/api/reference/cassiopeia.pp.collapse_umis.rst deleted file mode 100644 index 020d2ee7..00000000 --- a/docs/api/reference/cassiopeia.pp.collapse_umis.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.collapse\_umis -============================ - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: collapse_umis \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.compute_empirical_indel_priors.rst b/docs/api/reference/cassiopeia.pp.compute_empirical_indel_priors.rst deleted file mode 100644 index 1b0f4e5c..00000000 --- a/docs/api/reference/cassiopeia.pp.compute_empirical_indel_priors.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.compute\_empirical\_indel\_priors -=============================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: compute_empirical_indel_priors \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.convert_alleletable_to_character_matrix.rst b/docs/api/reference/cassiopeia.pp.convert_alleletable_to_character_matrix.rst deleted file mode 100644 index 5e11c83b..00000000 --- a/docs/api/reference/cassiopeia.pp.convert_alleletable_to_character_matrix.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.convert\_alleletable\_to\_character\_matrix -========================================================= - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: convert_alleletable_to_character_matrix \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.convert_alleletable_to_lineage_profile.rst b/docs/api/reference/cassiopeia.pp.convert_alleletable_to_lineage_profile.rst deleted file mode 100644 index 370d0832..00000000 --- a/docs/api/reference/cassiopeia.pp.convert_alleletable_to_lineage_profile.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.convert\_alleletable\_to\_lineage\_profile -======================================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: convert_alleletable_to_lineage_profile \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.convert_lineage_profile_to_character_matrix.rst b/docs/api/reference/cassiopeia.pp.convert_lineage_profile_to_character_matrix.rst deleted file mode 100644 index 893a4f42..00000000 --- a/docs/api/reference/cassiopeia.pp.convert_lineage_profile_to_character_matrix.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.convert\_lineage\_profile\_to\_character\_matrix -============================================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: convert_lineage_profile_to_character_matrix \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.error_correct_umis.rst b/docs/api/reference/cassiopeia.pp.error_correct_umis.rst deleted file mode 100644 index 95101d0d..00000000 --- a/docs/api/reference/cassiopeia.pp.error_correct_umis.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.error\_correct\_umis -================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: error_correct_umis \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.filter_cells.rst b/docs/api/reference/cassiopeia.pp.filter_cells.rst deleted file mode 100644 index db1c5704..00000000 --- a/docs/api/reference/cassiopeia.pp.filter_cells.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.filter\_cells -=========================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: filter_cells \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.filter_molecule_table.rst b/docs/api/reference/cassiopeia.pp.filter_molecule_table.rst deleted file mode 100644 index d062d345..00000000 --- a/docs/api/reference/cassiopeia.pp.filter_molecule_table.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.filter\_molecule\_table -===================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: filter_molecule_table \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.filter_umis.rst b/docs/api/reference/cassiopeia.pp.filter_umis.rst deleted file mode 100644 index 962eb79c..00000000 --- a/docs/api/reference/cassiopeia.pp.filter_umis.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.filter\_umis -========================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: filter_umis \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.pp.resolve_umi_sequence.rst b/docs/api/reference/cassiopeia.pp.resolve_umi_sequence.rst deleted file mode 100644 index a1a1ddc6..00000000 --- a/docs/api/reference/cassiopeia.pp.resolve_umi_sequence.rst +++ /dev/null @@ -1,6 +0,0 @@ -cassiopeia.pp.resolve\_umi\_sequence -==================================== - -.. currentmodule:: cassiopeia.pp - -.. autofunction:: resolve_umi_sequence \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.HybridSolver.rst b/docs/api/reference/cassiopeia.solver.HybridSolver.rst deleted file mode 100644 index 9646d98f..00000000 --- a/docs/api/reference/cassiopeia.solver.HybridSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.HybridSolver -============================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: HybridSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: HybridSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.ILPSolver.rst b/docs/api/reference/cassiopeia.solver.ILPSolver.rst deleted file mode 100644 index eebcd985..00000000 --- a/docs/api/reference/cassiopeia.solver.ILPSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.ILPSolver -=========================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: ILPSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: ILPSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.MaxCutGreedySolver.rst b/docs/api/reference/cassiopeia.solver.MaxCutGreedySolver.rst deleted file mode 100644 index 726bb6a1..00000000 --- a/docs/api/reference/cassiopeia.solver.MaxCutGreedySolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.MaxCutGreedySolver -==================================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: MaxCutGreedySolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: MaxCutGreedySolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.MaxCutSolver.rst b/docs/api/reference/cassiopeia.solver.MaxCutSolver.rst deleted file mode 100644 index 218c45fe..00000000 --- a/docs/api/reference/cassiopeia.solver.MaxCutSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.MaxCutSolver -============================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: MaxCutSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: MaxCutSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.NeighborJoiningSolver.rst b/docs/api/reference/cassiopeia.solver.NeighborJoiningSolver.rst deleted file mode 100644 index 17c83adf..00000000 --- a/docs/api/reference/cassiopeia.solver.NeighborJoiningSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.NeighborJoiningSolver -======================================= - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: NeighborJoiningSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: NeighborJoiningSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.PercolationSolver.rst b/docs/api/reference/cassiopeia.solver.PercolationSolver.rst deleted file mode 100644 index 8ddcc3c7..00000000 --- a/docs/api/reference/cassiopeia.solver.PercolationSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.PercolationSolver -=================================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: PercolationSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: PercolationSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.SharedMutationJoiningSolver.rst b/docs/api/reference/cassiopeia.solver.SharedMutationJoiningSolver.rst deleted file mode 100644 index 2b8081c3..00000000 --- a/docs/api/reference/cassiopeia.solver.SharedMutationJoiningSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.SharedMutationJoiningSolver -============================================= - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: SharedMutationJoiningSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: SharedMutationJoiningSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.SpectralGreedySolver.rst b/docs/api/reference/cassiopeia.solver.SpectralGreedySolver.rst deleted file mode 100644 index f9f4cfb6..00000000 --- a/docs/api/reference/cassiopeia.solver.SpectralGreedySolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.SpectralGreedySolver -====================================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: SpectralGreedySolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: SpectralGreedySolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.SpectralSolver.rst b/docs/api/reference/cassiopeia.solver.SpectralSolver.rst deleted file mode 100644 index 7d152bf9..00000000 --- a/docs/api/reference/cassiopeia.solver.SpectralSolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.SpectralSolver -================================ - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: SpectralSolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: SpectralSolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.UPGMASolver.rst b/docs/api/reference/cassiopeia.solver.UPGMASolver.rst deleted file mode 100644 index 691ac333..00000000 --- a/docs/api/reference/cassiopeia.solver.UPGMASolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.UPGMASolver -============================= - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: UPGMASolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: UPGMASolver - :methods: \ No newline at end of file diff --git a/docs/api/reference/cassiopeia.solver.VanillaGreedySolver.rst b/docs/api/reference/cassiopeia.solver.VanillaGreedySolver.rst deleted file mode 100644 index 7e761f6f..00000000 --- a/docs/api/reference/cassiopeia.solver.VanillaGreedySolver.rst +++ /dev/null @@ -1,13 +0,0 @@ -cassiopeia.solver.VanillaGreedySolver -===================================== - -.. currentmodule:: cassiopeia.solver - -.. autoclass:: VanillaGreedySolver - :members: - :undoc-members: - - .. rubric:: Methods - - .. autoautosummary:: VanillaGreedySolver - :methods: \ No newline at end of file diff --git a/docs/api/simulator.rst b/docs/api/simulator.rst deleted file mode 100644 index 45d16f96..00000000 --- a/docs/api/simulator.rst +++ /dev/null @@ -1,41 +0,0 @@ -=========== -Simulator -=========== -.. module:: cassiopeia.sim -.. currentmodule:: cassiopeia - - -Our simulators for cassiopeia are split up into those that simulate topologies and those that simulate data on top of the topologies. - -Tree Simulators -~~~~~~~~~~~~~~~~~~~ - -We have several frameworks available for simulating topologies: - -.. autosummary:: - :toctree: reference/ - - sim.BirthDeathFitnessSimulator - sim.CompleteBinarySimulator - sim.SimpleFitSubcloneSimulator - - -Data Simulators -~~~~~~~~~~~~~~~~~~~ - -These simulators are subclasses of the `DataSimulator` class and implement the `overlay_data` method which simulates data according to a given topology. - -.. autosummary:: - :toctree: reference/ - - sim.Cas9LineageTracingDataSimulator - -Leaf SubSamplers -~~~~~~~~~~~~~~~~~~~ -These are utilities for subsampling lineages for benchmarking purposes. For example, sampling a random proportion of leaves or grouping together cells into clades to model spatial data. - -.. autosummary:: - :toctree: reference/ - - sim.SupercellularSampler - sim.UniformLeafSubsampler \ No newline at end of file diff --git a/docs/api/solver.rst b/docs/api/solver.rst deleted file mode 100644 index 1215dac1..00000000 --- a/docs/api/solver.rst +++ /dev/null @@ -1,41 +0,0 @@ -=========== -Solver -=========== -.. module:: cassiopeia.solver -.. currentmodule:: cassiopeia - -CassiopeiaSolvers -~~~~~~~~~~~~~~~~~~~ - -We have several algorithms available for solving phylogenies: - -.. autosummary:: - :toctree: reference/ - - solver.HybridSolver - solver.ILPSolver - solver.MaxCutSolver - solver.MaxCutGreedySolver - solver.NeighborJoiningSolver - solver.PercolationSolver - solver.SharedMutationJoiningSolver - solver.SpectralSolver - solver.SpectralGreedySolver - solver.UPGMASolver - solver.VanillaGreedySolver - - -Dissimilarity Maps -~~~~~~~~~~~~~~~~~~~ - -For use in our distance-based solver and for comparing character states, we also have available several dissimilarity functions: - -.. autosummary:: - :toctree: reference/ - - solver.dissimilarity_functions.cluster_dissimilarity - solver.dissimilarity_functions.hamming_distance - solver.dissimilarity_functions.hamming_similarity_normalized_over_missing - solver.dissimilarity_functions.hamming_similarity_without_missing - solver.dissimilarity_functions.weighted_hamming_distance - solver.dissimilarity_functions.weighted_hamming_similarity \ No newline at end of file diff --git a/docs/api/tools.rst b/docs/api/tools.rst deleted file mode 100644 index afbade9b..00000000 --- a/docs/api/tools.rst +++ /dev/null @@ -1,43 +0,0 @@ -========== -Tools -========== - -.. module:: cassiopeia.tl -.. currentmodule:: cassiopeia - -This library stores code for post-reconstruction analysis of trees. We are -always in the process of developing new statistics and tools for helping us -interpret trees, and adding them to this library. - -Small-Parsimony -~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: reference/ - - tl.fitch_count - tl.fitch_hartigan - tl.score_small_parsimony - -Autocorrelation -~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - :toctree: reference/ - - tl.compute_morans_i - -Coupling -~~~~~~~~~~~ - -.. autosummary:: - :toctree: reference/ - - tl.compute_evolutionary_coupling - -Topology -~~~~~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: reference/ - - tl.compute_expansion_pvalues \ No newline at end of file From 841303452d35cdc86b01c9592f0bc03e956b9df0 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Mon, 1 Nov 2021 15:21:17 -0700 Subject: [PATCH 11/12] added back docs --- docs/api/critique.rst | 16 +++++++++++++++ docs/api/data.rst | 30 ++++++++++++++++++++++++++++ docs/api/index.rst | 19 ++++++++++++++++++ docs/api/plotting.rst | 16 +++++++++++++++ docs/api/preprocess.rst | 43 +++++++++++++++++++++++++++++++++++++++++ docs/api/simulator.rst | 41 +++++++++++++++++++++++++++++++++++++++ docs/api/solver.rst | 41 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 206 insertions(+) create mode 100644 docs/api/critique.rst create mode 100644 docs/api/data.rst create mode 100644 docs/api/index.rst create mode 100644 docs/api/plotting.rst create mode 100644 docs/api/preprocess.rst create mode 100644 docs/api/simulator.rst create mode 100644 docs/api/solver.rst diff --git a/docs/api/critique.rst b/docs/api/critique.rst new file mode 100644 index 00000000..46694706 --- /dev/null +++ b/docs/api/critique.rst @@ -0,0 +1,16 @@ +=========== +Critique +=========== +.. module:: cassiopeia.critique +.. currentmodule:: cassiopeia + +Critique +~~~~~~~~~~~~~~~~~~~ + +We support functionality for comparing trees to one another, for example when benchmarking new algorithms. + +.. autosummary:: + :toctree: reference/ + + critique.robinson_foulds + critique.triplets_correct \ No newline at end of file diff --git a/docs/api/data.rst b/docs/api/data.rst new file mode 100644 index 00000000..6d03519a --- /dev/null +++ b/docs/api/data.rst @@ -0,0 +1,30 @@ +=========== +Data +=========== +.. module:: cassiopeia.data +.. currentmodule:: cassiopeia + +CassiopeiaTrees +~~~~~~~~~~~~~~~~~~~ + +The main data structure that Cassiopeia uses for all tree-based analyses is the CassiopeiaTree: + +.. autosummary:: + :toctree: reference/ + + data.CassiopeiaTree + +Utilities +~~~~~~~~~~~~~~~~~~~ + +We also have several utilities that are useful for working with various data related to phylogenetics: + +.. autosummary:: + :toctree: reference/ + + data.compute_dissimilarity_map + data.compute_phylogenetic_weight_matrix + data.get_lca_characters + data.sample_bootstrap_allele_tables + data.sample_bootstrap_character_matrices + data.to_newick \ No newline at end of file diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 00000000..77cc9c95 --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,19 @@ +=== +API +=== + + +Import Cassiopeia as:: + + import cassiopeia as cas + +.. toctree:: + :maxdepth: 1 + + preprocess + data + critique + solver + simulator + plotting + tools \ No newline at end of file diff --git a/docs/api/plotting.rst b/docs/api/plotting.rst new file mode 100644 index 00000000..bc065672 --- /dev/null +++ b/docs/api/plotting.rst @@ -0,0 +1,16 @@ +========== +Plotting +========== + +.. module:: cassiopeia.pl +.. currentmodule:: cassiopeia + +Plotting +~~~~~~~~~~~~~~~~~~~ + +Currently, our plotting functionality is linked to the rich iTOL framework: + +.. autosummary:: + :toctree: reference/ + + pl.upload_and_export_itol \ No newline at end of file diff --git a/docs/api/preprocess.rst b/docs/api/preprocess.rst new file mode 100644 index 00000000..f79eb921 --- /dev/null +++ b/docs/api/preprocess.rst @@ -0,0 +1,43 @@ +=========== +Preprocess +=========== +.. module:: cassiopeia.pp +.. currentmodule:: cassiopeia + +Data Preprocessing +~~~~~~~~~~~~~~~~~~~ + +We have several functions that are part of our pipeline for processing sequencing data from single-cell lineage tracing technologies: + +.. autosummary:: + :toctree: reference/ + + pp.align_sequences + pp.call_alleles + pp.call_lineage_groups + pp.collapse_umis + pp.convert_fastqs_to_unmapped_bam + pp.error_correct_cellbcs_to_whitelist + pp.error_correct_intbcs_to_whitelist + pp.error_correct_umis + pp.filter_bam + pp.filter_molecule_table + pp.filter_cells + pp.filter_umis + pp.resolve_umi_sequence + + + + +Data Utilities +~~~~~~~~~~~~~~~~~~~ + +We also have several functions that are useful for converting between data formats for downstream analyses: + +.. autosummary:: + :toctree: reference/ + + pp.compute_empirical_indel_priors + pp.convert_alleletable_to_character_matrix + pp.convert_alleletable_to_lineage_profile + pp.convert_lineage_profile_to_character_matrix \ No newline at end of file diff --git a/docs/api/simulator.rst b/docs/api/simulator.rst new file mode 100644 index 00000000..45d16f96 --- /dev/null +++ b/docs/api/simulator.rst @@ -0,0 +1,41 @@ +=========== +Simulator +=========== +.. module:: cassiopeia.sim +.. currentmodule:: cassiopeia + + +Our simulators for cassiopeia are split up into those that simulate topologies and those that simulate data on top of the topologies. + +Tree Simulators +~~~~~~~~~~~~~~~~~~~ + +We have several frameworks available for simulating topologies: + +.. autosummary:: + :toctree: reference/ + + sim.BirthDeathFitnessSimulator + sim.CompleteBinarySimulator + sim.SimpleFitSubcloneSimulator + + +Data Simulators +~~~~~~~~~~~~~~~~~~~ + +These simulators are subclasses of the `DataSimulator` class and implement the `overlay_data` method which simulates data according to a given topology. + +.. autosummary:: + :toctree: reference/ + + sim.Cas9LineageTracingDataSimulator + +Leaf SubSamplers +~~~~~~~~~~~~~~~~~~~ +These are utilities for subsampling lineages for benchmarking purposes. For example, sampling a random proportion of leaves or grouping together cells into clades to model spatial data. + +.. autosummary:: + :toctree: reference/ + + sim.SupercellularSampler + sim.UniformLeafSubsampler \ No newline at end of file diff --git a/docs/api/solver.rst b/docs/api/solver.rst new file mode 100644 index 00000000..1215dac1 --- /dev/null +++ b/docs/api/solver.rst @@ -0,0 +1,41 @@ +=========== +Solver +=========== +.. module:: cassiopeia.solver +.. currentmodule:: cassiopeia + +CassiopeiaSolvers +~~~~~~~~~~~~~~~~~~~ + +We have several algorithms available for solving phylogenies: + +.. autosummary:: + :toctree: reference/ + + solver.HybridSolver + solver.ILPSolver + solver.MaxCutSolver + solver.MaxCutGreedySolver + solver.NeighborJoiningSolver + solver.PercolationSolver + solver.SharedMutationJoiningSolver + solver.SpectralSolver + solver.SpectralGreedySolver + solver.UPGMASolver + solver.VanillaGreedySolver + + +Dissimilarity Maps +~~~~~~~~~~~~~~~~~~~ + +For use in our distance-based solver and for comparing character states, we also have available several dissimilarity functions: + +.. autosummary:: + :toctree: reference/ + + solver.dissimilarity_functions.cluster_dissimilarity + solver.dissimilarity_functions.hamming_distance + solver.dissimilarity_functions.hamming_similarity_normalized_over_missing + solver.dissimilarity_functions.hamming_similarity_without_missing + solver.dissimilarity_functions.weighted_hamming_distance + solver.dissimilarity_functions.weighted_hamming_similarity \ No newline at end of file From 2709515beb9e9ade202fde2c1c7f4042542198b6 Mon Sep 17 00:00:00 2001 From: Matthew Jones Date: Wed, 10 Nov 2021 10:26:15 -0800 Subject: [PATCH 12/12] fixed up small clerical mistakes --- cassiopeia/data/utilities.py | 2 +- cassiopeia/tools/coupling.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cassiopeia/data/utilities.py b/cassiopeia/data/utilities.py index 73ebb466..77e4c127 100755 --- a/cassiopeia/data/utilities.py +++ b/cassiopeia/data/utilities.py @@ -417,7 +417,7 @@ def compute_phylogenetic_weight_matrix( @numba.jit(nopython=True) def net_relatedness_index( dissimilarity_map: np.array, indices_1: np.array, indices_2: np.array -) -> float: # pragma: no cover +) -> float: """Computes the net relatedness index between indices. Using the dissimilarity map specified and the indices of samples, compute diff --git a/cassiopeia/tools/coupling.py b/cassiopeia/tools/coupling.py index 0726acfc..2ba982c7 100644 --- a/cassiopeia/tools/coupling.py +++ b/cassiopeia/tools/coupling.py @@ -20,9 +20,7 @@ def compute_evolutionary_coupling( number_of_shuffles: int = 500, random_state: Optional[np.random.RandomState] = None, dissimilarity_map: Optional[pd.DataFrame] = None, - cluster_comparison_function: Optional[ - Callable - ] = data_utilities.net_relatedness_index, + cluster_comparison_function: Callable = data_utilities.net_relatedness_index, **comparison_kwargs, ) -> pd.DataFrame: """Computes Evolutionary Coupling of categorical variables. @@ -91,7 +89,9 @@ def compute_evolutionary_coupling( # compute background for Z-scoring background = defaultdict(list) - for _ in tqdm(range(number_of_shuffles), desc="Creating empirical background"): + for _ in tqdm( + range(number_of_shuffles), desc="Creating empirical background" + ): permuted_assignments = meta_data.copy() if random_state: permuted_assignments.index = random_state.permutation(