Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hotfix/cleanup #235

Merged
merged 5 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 0 additions & 61 deletions big_scape/network/families.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
import sys
from typing import Callable, Optional
import warnings
import numpy as np
import networkx
import math
import logging

# from dependencies
Expand Down Expand Up @@ -105,24 +102,6 @@ def generate_families(
return regions_families


def get_cc_edge_weight_std(connected_component) -> float:
"""calculates the standard deviation of the edge weights of a connected component

Args:
connected_component (list[tuple[int, int, float, float, float, float, str]]):
connected component in the form of a list of edges

Returns:
float: standard deviation of the edge weights of the connected component
"""

edge_weights = [edge[2] for edge in connected_component]
edge_std = np.std(edge_weights)
edge_std = round(edge_std, 2)

return edge_std


def get_cc_density(
connected_component: list[tuple[int, int, float, float, float, float, int]]
) -> float:
Expand All @@ -148,46 +127,6 @@ def get_cc_density(
return cc_density


def test_centrality(connected_component, node_fraction) -> tuple[bool, list[int]]:
"""tests if a network will break when removing the top nodes
with highest betweenness centrality

Args:
connected_component (list[tuple[int, int, float, float, float, float, str]]):
connected component in the form of a list of edges
node_fraction (float): fraction of nodes with highest betweenness centrality to remove

Returns:
tuple[bool, list[int]]: whether the network breaks and the list of nodes sorted by betweenness centrality
"""

edgelist = [(edge[0], edge[1], edge[2]) for edge in connected_component]

graph = networkx.Graph()
graph.add_weighted_edges_from(edgelist)

betweeness_centrality_dict = networkx.betweenness_centrality(graph)
sorted_between_bentrality_nodes = sorted(
betweeness_centrality_dict, key=betweeness_centrality_dict.get, reverse=True
)

# round up to nearest integer
top_nodes = math.ceil(len(sorted_between_bentrality_nodes) * node_fraction)
nodes_to_remove = sorted_between_bentrality_nodes[:top_nodes]

for node in nodes_to_remove:
graph.remove_node(node)

nr_ccs = networkx.number_connected_components(graph)

del graph

if nr_ccs > 1:
return True, sorted_between_bentrality_nodes

return False, sorted_between_bentrality_nodes


def aff_sim_matrix(matrix, preference: Optional[float] = None):
"""Execute affinity propagation on a __similarity__ matrix

Expand Down
17 changes: 0 additions & 17 deletions big_scape/network/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,6 @@

# from dependencies
import numpy as np
import networkx as nx


def sim_matrix_from_graph(graph: nx.Graph, edge_property: str) -> np.ndarray:
"""Return a similarity matrix from a graph in the form of a numpy array

Args:
graph (Graph): graph
edge_property (str): _description_

Returns:
ndarray: _description_
"""
matrix = nx.to_numpy_array(graph, weight=edge_property, nonedge=1.0)
# have to convert from distances to similarity
matrix = 1 - matrix
return matrix


def edge_list_to_adj_list(
Expand Down
6 changes: 3 additions & 3 deletions big_scape/utility/version.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""Module that contains helper functions specifically related to the bigscape version
"""

import toml

import tomllib
from importlib import metadata
from pathlib import Path

Expand All @@ -21,7 +20,8 @@ def get_bigscape_version() -> str:
pyproject_toml = Path(__file__).parent.parent.parent / "pyproject.toml"

if pyproject_toml.exists():
return toml.load(pyproject_toml)["project"]["version"]
with open(pyproject_toml, "rb") as fp:
return tomllib.load(fp)["project"]["version"]

# if not, we're probably running as a package. get the version of the currently
# installed big-scape package
Expand Down
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ dependencies:
- biopython=1.81
- sortedcontainers=2.4.0
- fasttree=2.1.11
- networkx=3.1
- numpy=1.26.0
- pandas=2.1.1
- pyhmmer=0.10.14
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ dev = [

# type stubs (https://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports)
"types-psutil",
"networkx-stubs",
"data-science-types",
"types-tqdm",
"types-setuptools"
Expand Down
1 change: 1 addition & 0 deletions test/comparison/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Contains tests for distance calculation"""
1 change: 1 addition & 0 deletions test/hmm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Contains tests involving (py)HMMer analysis and processing"""
1 change: 1 addition & 0 deletions test/network/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Contains tests involving CC and family generation"""
26 changes: 0 additions & 26 deletions test/network/test_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,6 @@ def test_aff_sim_matrix(self):

self.assertListEqual(expected_labels, actual_labels)

def test_get_cc_edge_weight_std(self):
"""Tests whether the standard deviation of the edge weights of a connected
component is correctly calculated
"""
adj_list = TestAffinityPropagation.gen_edge_list()

expected_std = 0.12

actual_std = bs_families.get_cc_edge_weight_std(adj_list)

self.assertEqual(expected_std, actual_std)

def test_get_cc_density(self):
"""Tests whether the density of a connected component is correctly
calculated
Expand All @@ -146,17 +134,3 @@ def test_get_cc_density(self):
actual_density = bs_families.get_cc_density(adj_list)

self.assertEqual(expected_density, actual_density)

def test_test_centrality(self):
"""Tests whether the test_centrality function correctly identifies a network
that will break when removing the top nodes with highest betweenness centrality
"""
adj_list = TestAffinityPropagation.gen_edge_list_alt()

expected_break = True

actual_break, actual_sorted_centrality_nodes = bs_families.test_centrality(
adj_list, 0.3
)

self.assertEqual(expected_break, actual_break)
Loading