Skip to content

Commit

Permalink
Merge pull request #310 from kayjan/feature/all-diff-detail
Browse files Browse the repository at this point in the history
Add detail to get_tree_diff
  • Loading branch information
kayjan authored Nov 2, 2024
2 parents cf32279 + 0961df6 commit 1f09696
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 9 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.22.0] - 2024-11-03
### Added:
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default it is false.

## [0.21.3] - 2024-10-16
### Added:
- Tree Node: Docstring indentation and additional information for Node creation.
Expand Down Expand Up @@ -668,7 +672,8 @@ ignore null attribute columns.
- Utility Iterator: Tree traversal methods.
- Workflow To Do App: Tree use case with to-do list implementation.

[Unreleased]: https://github.com/kayjan/bigtree/compare/0.21.3...HEAD
[Unreleased]: https://github.com/kayjan/bigtree/compare/0.22.0...HEAD
[0.22.0]: https://github.com/kayjan/bigtree/compare/0.21.3...0.22.0
[0.21.3]: https://github.com/kayjan/bigtree/compare/0.21.2...0.21.3
[0.21.2]: https://github.com/kayjan/bigtree/compare/0.21.1...0.21.2
[0.21.1]: https://github.com/kayjan/bigtree/compare/0.21.0...0.21.1
Expand Down
2 changes: 1 addition & 1 deletion bigtree/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.21.3"
__version__ = "0.22.0"

from bigtree.binarytree.construct import list_to_binarytree
from bigtree.dag.construct import dataframe_to_dag, dict_to_dag, list_to_dag
Expand Down
52 changes: 48 additions & 4 deletions bigtree/tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def get_tree_diff(
tree: node.Node,
other_tree: node.Node,
only_diff: bool = True,
detail: bool = False,
attr_list: List[str] = [],
fallback_sep: str = "/",
) -> node.Node:
Expand All @@ -255,6 +256,9 @@ def get_tree_diff(
- For example: (+) refers to nodes that are in `other_tree` but not `tree`.
- For example: (-) refers to nodes that are in `tree` but not `other_tree`.
If `detail=True`, (added) and (moved to) will be used instead of (+), (removed) and (moved from)
will be used instead of (-).
!!! note
- tree and other_tree must have the same `sep` symbol, otherwise this will raise ValueError
Expand Down Expand Up @@ -298,6 +302,15 @@ def get_tree_diff(
├── file1.doc
└── photo2.jpg (-)
>>> tree_diff = get_tree_diff(root, root_other, detail=True)
>>> tree_diff.show()
Downloads
├── Pictures
│ ├── photo1.jpg
│ └── photo2.jpg (moved to)
├── file1.doc
└── photo2.jpg (moved from)
Comparing tree attributes
- (~) will be added to node name if there are differences in tree attributes defined in `attr_list`.
Expand Down Expand Up @@ -339,6 +352,7 @@ def get_tree_diff(
tree (Node): tree to be compared against
other_tree (Node): tree to be compared with
only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True
detail (bool): indicator to differentiate between different types of diff e.g., added or removed or moved
attr_list (List[str]): tree attributes to check for difference, defaults to empty list
fallback_sep (str): sep to fall back to if tree and other_tree has sep that clashes with symbols "+" / "-" / "~".
All node names in tree and other_tree should not contain this fallback_sep, defaults to "/"
Expand Down Expand Up @@ -388,13 +402,43 @@ def get_tree_diff(
nodes_added = list(data_both[data_both[indicator_col] == "right_only"][path_col])[
::-1
]
for node_removed in nodes_removed:

moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))]
moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))]
if detail:
_sep = tree.sep
node_names_removed = [
node_removed.split(_sep)[-1] for node_removed in nodes_removed
]
node_names_added = [node_added.split(_sep)[-1] for node_added in nodes_added]
moved_from_indicator = [
node_name_removed in node_names_added
for node_name_removed in node_names_removed
]
moved_to_indicator = [
node_name_added in node_names_removed
for node_name_added in node_names_added
]

for node_removed, move_indicator in zip(nodes_removed, moved_from_indicator):
if not detail:
suffix = "-"
elif move_indicator:
suffix = "moved from"
else:
suffix = "removed"
data_both[path_col] = data_both[path_col].str.replace(
node_removed, f"{node_removed} (-)", regex=True
node_removed, f"{node_removed} ({suffix})", regex=True
)
for node_added in nodes_added:
for node_added, move_indicator in zip(nodes_added, moved_to_indicator):
if not detail:
suffix = "+"
elif move_indicator:
suffix = "moved to"
else:
suffix = "added"
data_both[path_col] = data_both[path_col].str.replace(
node_added, f"{node_added} (+)", regex=True
node_added, f"{node_added} ({suffix})", regex=True
)

# Check tree attribute difference
Expand Down
37 changes: 36 additions & 1 deletion docs/gettingstarted/demo/tree.md
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,11 @@ To compare tree attributes:
- `(-)`: Node is removed in second tree
- `(~)`: Node has different attributes, only available when comparing attributes

For more details, `(moved from)`, `(moved to)`, `(added)`, and `(removed)` can
be indicated instead if `(+)` and `(-)`.

=== "Only differences"
```python hl_lines="20 29"
```python hl_lines="20"
from bigtree import str_to_tree, get_tree_diff

root = str_to_tree("""
Expand Down Expand Up @@ -1015,6 +1018,38 @@ To compare tree attributes:
# ├── f (-)
# └── g (+)
```
=== "With details"
```python hl_lines="21"
from bigtree import str_to_tree, get_tree_diff

root = str_to_tree("""
a
├── b
│ ├── d
│ └── e
└── c
└── f
""")

root_other = str_to_tree("""
a
├── b
│ └── g
└── c
├── d
└── f
""")

tree_diff = get_tree_diff(root, root_other, detail=True)
tree_diff.show()
# a
# ├── b
# │ ├── d (moved from)
# │ ├── e (removed)
# │ └── g (added)
# └── c
# └── d (moved to)
```
=== "Attribute difference"
```python hl_lines="25"
from bigtree import Node, get_tree_diff
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ dependencies = [
]

[tool.hatch.envs.default.scripts]
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=bigtree {args} --benchmark-autosave --benchmark-histogram=.benchmarks/histogram --benchmark-json .benchmarks/output.json"
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=bigtree {args}"
cov-benchmark = "cov && --benchmark-autosave --benchmark-histogram=.benchmarks/histogram --benchmark-json .benchmarks/output.json"
no-cov = "test && coverage report --show-missing --omit='*/workflows/*' --benchmark-autosave --benchmark-histogram=.benchmarks/histogram --benchmark-json .benchmarks/output.json"
test = "pytest . {args}"
lint = "black -l 88 ."
Expand Down
5 changes: 5 additions & 0 deletions tests/tree/test_export.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import unittest

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -1140,6 +1141,10 @@ def test_tree_to_polars_name_col_missing(tree_node):
assert expected.equals(actual)

@staticmethod
@unittest.skipIf(
tuple(map(int, pl.__version__.split(".")[:2])) > (1, 9),
reason="Not compatible with polars>1.9.0",
)
def test_tree_to_polars_name_path_col_missing(tree_node):
expected = pl.DataFrame()
expected.index = range(8)
Expand Down
109 changes: 108 additions & 1 deletion tests/tree/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from bigtree.node import basenode, node
from bigtree.tree import export, helper
from bigtree.tree import export, helper, modify
from bigtree.utils import exceptions
from tests.conftest import assert_print_statement
from tests.node.test_basenode import (
Expand Down Expand Up @@ -288,6 +288,22 @@ def test_tree_diff_forbidden_sep(tree_node):
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_detail(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
_ = node.Node("i", parent=other_tree_node)
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node, detail=True)
expected_str = (
"a\n"
"├── b (removed)\n"
"│ ├── d (removed)\n"
"│ └── e (removed)\n"
"│ ├── g (removed)\n"
"│ └── h (removed)\n"
"└── i (added)\n"
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_all_diff(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
Expand All @@ -306,6 +322,50 @@ def test_tree_diff_all_diff(tree_node):
)
assert_print_statement(export.print_tree, expected_str, tree=tree_diff)

@staticmethod
def test_tree_diff_all_diff_detail(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
_ = node.Node("i", parent=other_tree_node)
tree_diff = helper.get_tree_diff(
tree_node, other_tree_node, only_diff=False, detail=True
)
expected_str = (
"a\n"
"├── b (removed)\n"
"│ ├── d (removed)\n"
"│ └── e (removed)\n"
"│ ├── g (removed)\n"
"│ └── h (removed)\n"
"├── c\n"
"│ └── f\n"
"└── i (added)\n"
)
assert_print_statement(export.print_tree, expected_str, tree=tree_diff)

@staticmethod
def test_tree_diff_detail_move(tree_node):
other_tree_node = tree_node.copy()
modify.shift_nodes(
other_tree_node, from_paths=["a/b/d", "a/b"], to_paths=[None, "a/c/b"]
)
_ = node.Node("i", parent=other_tree_node)
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node, detail=True)
expected_str = (
"a\n"
"├── b (moved from)\n"
"│ ├── d (removed)\n"
"│ └── e (moved from)\n"
"│ ├── g (moved from)\n"
"│ └── h (moved from)\n"
"├── c\n"
"│ └── b (moved to)\n"
"│ └── e (moved to)\n"
"│ ├── g (moved to)\n"
"│ └── h (moved to)\n"
"└── i (added)\n"
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_new_leaf(tree_node):
other_tree_node = tree_node.copy()
Expand Down Expand Up @@ -575,6 +635,53 @@ def test_tree_diff_attributes_different_structure_different_attributes_all_diff(
actual = export.tree_to_dict(tree_diff, all_attrs=True)
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"

@staticmethod
def test_tree_diff_attributes_different_structure_different_attributes_all_diff_detail(
tree_node,
):
from bigtree import find_name

tree_node_copy = tree_node.copy()
for node_name_to_remove in ["d"]:
node_to_remove = find_name(tree_node_copy, node_name_to_remove)
node_to_remove.parent = None
for node_name_to_change in ["c", "f"]:
node_to_change = find_name(tree_node_copy, node_name_to_change)
node_to_change.age += 10

# Without attributes
expected_str = (
"a\n"
"├── b\n"
"│ ├── d (removed)\n"
"│ └── e\n"
"│ ├── g\n"
"│ └── h\n"
"└── c\n"
" └── f\n"
)
tree_diff = helper.get_tree_diff(
tree_node, tree_node_copy, only_diff=False, detail=True
)
assert_print_statement(export.print_tree, expected_str, tree=tree_diff)

# With attributes
expected = {
"/a": {"name": "a"},
"/a/b": {"name": "b"},
"/a/b/d (removed)": {"name": "d (removed)"},
"/a/b/e": {"name": "e"},
"/a/b/e/g": {"name": "g"},
"/a/b/e/h": {"name": "h"},
"/a/c (~)": {"age": (60, 70.0), "name": "c (~)"},
"/a/c (~)/f (~)": {"age": (38, 48.0), "name": "f (~)"},
}
tree_diff = helper.get_tree_diff(
tree_node, tree_node_copy, only_diff=False, detail=True, attr_list=["age"]
)
actual = export.tree_to_dict(tree_diff, all_attrs=True)
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"

@staticmethod
def test_tree_diff_attributes_invalid_attribute(tree_node):
from bigtree import find_name
Expand Down

0 comments on commit 1f09696

Please sign in to comment.