Skip to content

Commit

Permalink
Merge pull request #225 from kayjan/abstract-functions
Browse files Browse the repository at this point in the history
Abstract functions
  • Loading branch information
kayjan authored Apr 23, 2024
2 parents 33bb781 + c038692 commit 34b6cb9
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 64 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Changed:
- DAG Constructor: `list_to_dag` and `dict_to_dag` does not rely on `dataframe_to_dag` as pandas dataframe operation
is phased out.
### Fixed:
- DAG Constructor: Handle cases where reserved keywords are part of attribute upon creation and throw error accordingly.

## [0.17.1] - 2024-04-23
### Fixed
Expand Down
5 changes: 1 addition & 4 deletions bigtree/binarytree/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ def list_to_binarytree(
node_list = [root_node]
for idx, num in enumerate(heapq_list):
if idx:
if idx % 2:
parent_idx = int((idx - 1) / 2)
else:
parent_idx = int((idx - 2) / 2)
parent_idx = int((idx + 1) / 2) - 1
node = node_type(num, parent=node_list[parent_idx]) # type: ignore
node_list.append(node)
return root_node
63 changes: 45 additions & 18 deletions bigtree/dag/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from bigtree.utils.assertions import (
assert_dataframe_no_duplicate_attribute,
assert_dataframe_not_empty,
assert_dictionary_not_empty,
assert_key_not_in_dict_or_df,
assert_length_not_empty,
filter_attributes,
isnull,
Expand All @@ -21,7 +21,6 @@
__all__ = ["list_to_dag", "dict_to_dag", "dataframe_to_dag"]


@optional_dependencies_pandas
def list_to_dag(
relations: List[Tuple[str, str]],
node_type: Type[DAGNode] = DAGNode,
Expand All @@ -45,13 +44,26 @@ def list_to_dag(
"""
assert_length_not_empty(relations, "Input list", "relations")

relation_data = pd.DataFrame(relations, columns=["parent", "child"])
return dataframe_to_dag(
relation_data, child_col="child", parent_col="parent", node_type=node_type
)
node_dict: Dict[str, DAGNode] = dict()
parent_node = DAGNode()

for parent_name, child_name in relations:
if parent_name not in node_dict:
parent_node = node_type(parent_name)
node_dict[parent_name] = parent_node
else:
parent_node = node_dict[parent_name]
if child_name not in node_dict:
child_node = node_type(child_name)
node_dict[child_name] = child_node
else:
child_node = node_dict[child_name]

child_node.parents = [parent_node]

return parent_node


@optional_dependencies_pandas
def dict_to_dag(
relation_attrs: Dict[str, Any],
parent_key: str = "parents",
Expand Down Expand Up @@ -83,22 +95,36 @@ def dict_to_dag(
Returns:
(DAGNode)
"""
assert_dictionary_not_empty(relation_attrs, "relation_attrs")
assert_length_not_empty(relation_attrs, "Dictionary", "relation_attrs")

node_dict: Dict[str, DAGNode] = dict()
parent_node: DAGNode | None = None

for child_name, node_attrs in relation_attrs.items():
node_attrs = node_attrs.copy()
parent_names: List[str] = []
if parent_key in node_attrs:
parent_names = node_attrs.pop(parent_key)
assert_key_not_in_dict_or_df(node_attrs, ["parent", "parents", "children"])

if child_name in node_dict:
child_node = node_dict[child_name]
child_node.set_attrs(node_attrs)
else:
child_node = node_type(child_name, **node_attrs)
node_dict[child_name] = child_node

for parent_name in parent_names:
parent_node = node_dict.get(parent_name, node_type(parent_name))
node_dict[parent_name] = parent_node
child_node.parents = [parent_node]

# Convert dictionary to dataframe
data = pd.DataFrame(relation_attrs).T.rename_axis("_tmp_child").reset_index()
if parent_key not in data:
if parent_node is None:
raise ValueError(
f"Parent key {parent_key} not in dictionary, check `relation_attrs` and `parent_key`"
)

data = data.explode(parent_key)
return dataframe_to_dag(
data,
child_col="_tmp_child",
parent_col=parent_key,
node_type=node_type,
)
return parent_node


@optional_dependencies_pandas
Expand Down Expand Up @@ -164,6 +190,7 @@ def dataframe_to_dag(
attribute_cols = list(data.columns)
attribute_cols.remove(child_col)
attribute_cols.remove(parent_col)
assert_key_not_in_dict_or_df(attribute_cols, ["parent", "parents", "children"])

data = data[[child_col, parent_col] + attribute_cols].copy()

Expand Down
6 changes: 2 additions & 4 deletions bigtree/dag/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any, Dict, List, Tuple, TypeVar, Union

from bigtree.node.dagnode import DAGNode
from bigtree.utils.assertions import assert_tree_type
from bigtree.utils.exceptions import (
optional_dependencies_image,
optional_dependencies_pandas,
Expand Down Expand Up @@ -265,10 +266,7 @@ def dag_to_dot(
dag = [dag]

for _dag in dag:
if not isinstance(_dag, DAGNode):
raise TypeError(
"Tree should be of type `DAGNode`, or inherit from `DAGNode`"
)
assert_tree_type(_dag, DAGNode, "DAGNode")
_dag = _dag.copy()

for parent_node, child_node in dag_iterator(_dag):
Expand Down
9 changes: 4 additions & 5 deletions bigtree/tree/construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
assert_dataframe_no_duplicate_attribute,
assert_dataframe_no_duplicate_children,
assert_dataframe_not_empty,
assert_dictionary_not_empty,
assert_length_not_empty,
filter_attributes,
isnull,
Expand Down Expand Up @@ -185,7 +184,7 @@ def add_dict_to_tree_by_path(
Returns:
(Node)
"""
assert_dictionary_not_empty(path_attrs, "path_attrs")
assert_length_not_empty(path_attrs, "Dictionary", "path_attrs")

root_node = tree.root

Expand Down Expand Up @@ -232,7 +231,7 @@ def add_dict_to_tree_by_name(tree: Node, name_attrs: Dict[str, Dict[str, Any]])
"""
from bigtree.tree.search import findall

assert_dictionary_not_empty(name_attrs, "name_attrs")
assert_length_not_empty(name_attrs, "Dictionary", "name_attrs")

attr_dict_names = set(name_attrs.keys())

Expand Down Expand Up @@ -642,7 +641,7 @@ def dict_to_tree(
Returns:
(Node)
"""
assert_dictionary_not_empty(path_attrs, "path_attrs")
assert_length_not_empty(path_attrs, "Dictionary", "path_attrs")

# Initial tree
root_name = list(path_attrs.keys())[0].lstrip(sep).rstrip(sep).split(sep)[0]
Expand Down Expand Up @@ -724,7 +723,7 @@ def nested_dict_to_tree(
Returns:
(Node)
"""
assert_dictionary_not_empty(node_attrs, "node_attrs")
assert_length_not_empty(node_attrs, "Dictionary", "node_attrs")

def _recursive_add_child(
child_dict: Dict[str, Any], parent_node: Optional[Node] = None
Expand Down
4 changes: 2 additions & 2 deletions bigtree/tree/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
assert_key_in_dict,
assert_str_in_list,
assert_style_in_dict,
assert_tree_type,
isnull,
)
from bigtree.utils.constants import ExportConstants, MermaidConstants, NewickCharacter
Expand Down Expand Up @@ -1223,8 +1224,7 @@ def tree_to_dot(
tree = [tree]

for _tree in tree:
if not isinstance(_tree, Node):
raise TypeError("Tree should be of type `Node`, or inherit from `Node`")
assert_tree_type(_tree, Node, "Node")

name_dict: Dict[str, List[str]] = collections.defaultdict(list)

Expand Down
4 changes: 2 additions & 2 deletions bigtree/tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from bigtree.tree.construct import add_dict_to_tree_by_path, dataframe_to_tree
from bigtree.tree.export import tree_to_dataframe
from bigtree.tree.search import find_path
from bigtree.utils.assertions import assert_tree_type
from bigtree.utils.exceptions import NotFoundError
from bigtree.utils.iterators import levelordergroup_iter

Expand Down Expand Up @@ -34,8 +35,7 @@ def clone_tree(tree: BaseNode, node_type: Type[BaseNodeT]) -> BaseNodeT:
Returns:
(BaseNode)
"""
if not isinstance(tree, BaseNode):
raise TypeError("Tree should be of type `BaseNode`, or inherit from `BaseNode`")
assert_tree_type(tree, BaseNode, "BaseNode")

# Start from root
root_info = dict(tree.root.describe(exclude_prefix="_"))
Expand Down
81 changes: 60 additions & 21 deletions bigtree/utils/assertions.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
from __future__ import annotations

import math
from typing import Any, Dict, List, Union
from typing import TYPE_CHECKING, Any, Dict, List, Sized, Type, Union

try:
if TYPE_CHECKING:
import pandas as pd
except ImportError: # pragma: no cover
pd = None

from bigtree.node.basenode import BaseNode
from bigtree.node.dagnode import DAGNode
from bigtree.node.node import Node


__all__ = [
"assert_style_in_dict",
"assert_str_in_list",
"assert_key_in_dict",
"assert_length_not_empty",
"assert_dataframe_not_empty",
"assert_dataframe_no_duplicate_attribute",
"assert_dataframe_no_duplicate_children",
"assert_tree_type",
"isnull",
"filter_attributes",
]


def assert_style_in_dict(
Expand Down Expand Up @@ -43,6 +58,23 @@ def assert_str_in_list(
)


def assert_key_not_in_dict_or_df(
parameter_dict: Union[Dict[str, Any], pd.DataFrame],
not_accepted_parameters: List[str],
) -> None:
"""Raise ValueError is parameter is in key of dictionary
Args:
parameter_dict (Dict[str, Any]/pd.DataFrame): argument input for parameter
not_accepted_parameters (List[str]): list of not accepted parameters
"""
for parameter in parameter_dict:
if parameter in not_accepted_parameters:
raise ValueError(
f"Invalid input, check `{parameter}` is not a valid key as it is a reserved keyword"
)


def assert_key_in_dict(
parameter_name: str,
parameter: Any,
Expand All @@ -61,13 +93,11 @@ def assert_key_in_dict(
)


def assert_length_not_empty(
data: Union[str, List[Any]], argument_name: str, argument: str
) -> None:
"""Raise ValueError if data (str, list, or iterable) does not have length
def assert_length_not_empty(data: Sized, argument_name: str, argument: str) -> None:
"""Raise ValueError if data does not have length
Args:
data (str/List[Any]): data to check
data (Sized): data to check
argument_name: argument name for data, for error message
argument (str): argument for data, for error message
"""
Expand All @@ -77,17 +107,6 @@ def assert_length_not_empty(
)


def assert_dictionary_not_empty(data_dict: Dict[Any, Any], argument: str) -> None:
"""Raise ValueError is dictionary is empty
Args:
data_dict (Dict[Any, Any]): dictionary to check
argument (str): argument for dictionary, for error message
"""
if not len(data_dict):
raise ValueError(f"Dictionary does not contain any data, check `{argument}`")


def assert_dataframe_not_empty(data: pd.DataFrame) -> None:
"""Raise ValueError is dataframe is empty
Expand Down Expand Up @@ -158,6 +177,24 @@ def assert_dataframe_no_duplicate_children(
)


def assert_tree_type(
tree: Union[BaseNode, Node, DAGNode],
tree_type: Union[Type[BaseNode], Type[Node], Type[DAGNode]],
tree_type_name: str,
) -> None:
"""Raise TypeError is tree is not of `tree_type`
Args:
tree (Union["BaseNode", "Node", "DAGNode"]): tree to check
tree_type: tree type to assert for
tree_type_name (str): tree type name
"""
if not isinstance(tree, tree_type):
raise TypeError(
f"Tree should be of type `{tree_type_name}`, or inherit from `{tree_type_name}`"
)


def isnull(value: Any) -> bool:
"""Check if value is null
Expand All @@ -167,6 +204,8 @@ def isnull(value: Any) -> bool:
Returns:
(bool)
"""
import math

if not value or (isinstance(value, float) and math.isnan(value)):
return True
return False
Expand Down
6 changes: 3 additions & 3 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ theme:
plugins:
- glightbox
- search
- social:
cards_layout_options:
logo: docs/_static/favicon.svg
# - social:
# cards_layout_options:
# logo: docs/_static/favicon.svg
- mkdocstrings:
handlers:
python:
Expand Down
Loading

0 comments on commit 34b6cb9

Please sign in to comment.