Skip to content

Commit ffb365f

Browse files
authored
Merge pull request #318 from kayjan/bugfix/tree-diff-substring
Fix get_tree_diff if path are substring of a modified path
2 parents 502290d + 3841978 commit ffb365f

File tree

3 files changed

+67
-12
lines changed

3 files changed

+67
-12
lines changed

CHANGELOG.md

+6-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
### Fixed:
1212
- Misc: Doctest for docstrings, docstring to indicate usage prefers `node_name` to `name`.
1313
- Tree Export: Mermaid diagram title to add newline.
14+
- Tree Helper: Get tree diff string replacement bug when the path change is substring of another path.
1415

1516
## [0.22.1] - 2024-11-03
1617
### Added:
@@ -21,7 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2122

2223
## [0.22.0] - 2024-11-03
2324
### Added:
24-
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default it is false.
25+
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default
26+
it is false.
2527

2628
## [0.21.3] - 2024-10-16
2729
### Added:
@@ -96,10 +98,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9698

9799
## [0.19.0] - 2024-06-15
98100
### Changed:
99-
- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more
100-
object-oriented way of parsing arguments.
101-
This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`.
102-
The argument `custom_style` is deprecated, and argument `style` is used instead.
101+
- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more object-oriented
102+
way of parsing arguments. This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`. The
103+
argument `custom_style` is deprecated, and argument `style` is used instead.
103104
**This might not be backwards-compatible!**
104105
- Misc: Update docstrings to be more comprehensive for tree constructor and tree exporter.
105106
- Misc: Update documentation badges and conda information.

bigtree/tree/helper.py

+44-7
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
from bigtree.tree import construct, export, search
66
from bigtree.utils import assertions, exceptions, iterators
77

8+
try:
9+
import pandas as pd
10+
except ImportError: # pragma: no cover
11+
from unittest.mock import MagicMock
12+
13+
pd = MagicMock()
14+
815
__all__ = ["clone_tree", "get_subtree", "prune_tree", "get_tree_diff"]
916
BaseNodeT = TypeVar("BaseNodeT", bound=basenode.BaseNode)
1017
BinaryNodeT = TypeVar("BinaryNodeT", bound=binarynode.BinaryNode)
@@ -237,6 +244,7 @@ def prune_tree(
237244
return tree_copy
238245

239246

247+
@exceptions.optional_dependencies_pandas
240248
def get_tree_diff(
241249
tree: node.Node,
242250
other_tree: node.Node,
@@ -376,6 +384,7 @@ def get_tree_diff(
376384
name_col = "name"
377385
path_col = "PATH"
378386
indicator_col = "Exists"
387+
tree_sep = tree.sep
379388

380389
data, data_other = (
381390
export.tree_to_dataframe(
@@ -406,11 +415,12 @@ def get_tree_diff(
406415
moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))]
407416
moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))]
408417
if detail:
409-
_sep = tree.sep
410418
node_names_removed = [
411-
node_removed.split(_sep)[-1] for node_removed in nodes_removed
419+
node_removed.split(tree_sep)[-1] for node_removed in nodes_removed
420+
]
421+
node_names_added = [
422+
node_added.split(tree_sep)[-1] for node_added in nodes_added
412423
]
413-
node_names_added = [node_added.split(_sep)[-1] for node_added in nodes_added]
414424
moved_from_indicator = [
415425
node_name_removed in node_names_added
416426
for node_name_removed in node_names_removed
@@ -420,15 +430,39 @@ def get_tree_diff(
420430
for node_name_added in node_names_added
421431
]
422432

433+
def add_suffix_to_path(
434+
_data: pd.DataFrame, _condition: pd.Series, _original_name: str, _suffix: str
435+
) -> pd.DataFrame:
436+
"""Add suffix to path string
437+
438+
Args:
439+
_data (pd.DataFrame): original data with path column
440+
_condition (pd.Series): whether to add suffix, contains True/False values
441+
_original_name (str): path prefix to add suffix to
442+
_suffix (str): suffix to add to path column
443+
444+
Returns:
445+
(pd.DataFrame)
446+
"""
447+
data_replace = _data[_condition]
448+
data_replace[path_col] = data_replace[path_col].str.replace(
449+
_original_name, f"{_original_name} ({suffix})", regex=True
450+
)
451+
data_not_replace = _data[~_condition]
452+
return data_replace._append(data_not_replace).sort_index()
453+
423454
for node_removed, move_indicator in zip(nodes_removed, moved_from_indicator):
424455
if not detail:
425456
suffix = "-"
426457
elif move_indicator:
427458
suffix = "moved from"
428459
else:
429460
suffix = "removed"
430-
data_both[path_col] = data_both[path_col].str.replace(
431-
node_removed, f"{node_removed} ({suffix})", regex=True
461+
condition_node_removed = data_both[path_col].str.endswith(
462+
node_removed
463+
) | data_both[path_col].str.contains(node_removed + tree_sep)
464+
data_both = add_suffix_to_path(
465+
data_both, condition_node_removed, node_removed, suffix
432466
)
433467
for node_added, move_indicator in zip(nodes_added, moved_to_indicator):
434468
if not detail:
@@ -437,8 +471,11 @@ def get_tree_diff(
437471
suffix = "moved to"
438472
else:
439473
suffix = "added"
440-
data_both[path_col] = data_both[path_col].str.replace(
441-
node_added, f"{node_added} ({suffix})", regex=True
474+
condition_node_added = data_both[path_col].str.endswith(node_added) | data_both[
475+
path_col
476+
].str.contains(node_added + tree_sep)
477+
data_both = add_suffix_to_path(
478+
data_both, condition_node_added, node_added, suffix
442479
)
443480

444481
# Check tree attribute difference

tests/tree/test_helper.py

+17
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,23 @@ def test_tree_diff(tree_node):
235235
)
236236
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)
237237

238+
@staticmethod
239+
def test_tree_diff_same_prefix():
240+
tree_node = node.Node(
241+
"a", children=[node.Node("bb", children=[node.Node("b")])]
242+
)
243+
other_tree_node = node.Node("a", children=[node.Node("b")])
244+
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node)
245+
# fmt: off
246+
expected_str = (
247+
"a\n"
248+
"├── b (+)\n"
249+
"└── bb (-)\n"
250+
" └── b (-)\n"
251+
)
252+
# fmt: on
253+
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)
254+
238255
@staticmethod
239256
def test_tree_diff_diff_sep_error(tree_node):
240257
other_tree_node = helper.prune_tree(tree_node, "a/c")

0 commit comments

Comments
 (0)