Skip to content

Commit da6793a

Browse files
authored
Merge pull request #329 from kayjan/feature/tree-diff-attr
Check for moved indicator via dataframe operations
2 parents 813ebca + 6d777ab commit da6793a

File tree

2 files changed

+48
-33
lines changed

2 files changed

+48
-33
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88
### Changed:
99
- Tree Helper: Get tree diff logic to be faster to compare all attribute list and data at once (for attr diff).
1010
- Tree Helper: Get tree diff logic to be faster to add suffix at the end (for path diff).
11+
- Tree Helper: Get tree diff logic to be faster to detect moved indicator using dataframe operations (for detail).
1112

1213
## [0.22.2] - 2024-11-11
1314
### Added:

bigtree/tree/helper.py

+47-33
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def get_tree_diff(
439439
indicator_col = "Exists"
440440
old_suffix = "_old"
441441
new_suffix = "_new"
442-
tree_sep = tree.sep
442+
moved_ind = "moved_ind"
443443

444444
data, data_other = (
445445
export.tree_to_dataframe(
@@ -475,32 +475,46 @@ def get_tree_diff(
475475
data_path_diff = data_compare
476476

477477
# Handle tree structure difference
478-
paths_removed = list(
479-
data_path_diff[data_path_diff[indicator_col] == "left_only"][path_col]
480-
)[::-1]
481-
paths_added = list(
482-
data_path_diff[data_path_diff[indicator_col] == "right_only"][path_col]
483-
)[::-1]
484-
485-
moved_from_ind: List[bool] = [True for _ in range(len(paths_removed))]
486-
moved_to_ind: List[bool] = [True for _ in range(len(paths_added))]
478+
data_tree = data_path_diff[data_path_diff[indicator_col] == "left_only"]
479+
data_tree_other = data_path_diff[data_path_diff[indicator_col] == "right_only"]
480+
487481
if detail:
488-
names_removed = [path.split(tree_sep)[-1] for path in paths_removed]
489-
names_added = [path.split(tree_sep)[-1] for path in paths_added]
490-
moved_from_ind = [name in names_added for name in names_removed]
491-
moved_to_ind = [name in names_removed for name in names_added]
492-
493-
path_removed_to_suffix = {
494-
path: "-" if not detail else ("moved from" if move_ind else "removed")
495-
for path, move_ind in zip(paths_removed, moved_from_ind)
496-
}
497-
path_added_to_suffix = {
498-
path: "+" if not detail else ("moved to" if move_ind else "added")
499-
for path, move_ind in zip(paths_added, moved_to_ind)
500-
}
482+
data_tree[moved_ind] = False
483+
data_tree_other[moved_ind] = False
484+
485+
if len(data_tree) and len(data_tree_other):
486+
# Check for moved from and moved to
487+
move_from_condition = data_tree[
488+
data_tree[name_col].isin(set(data_tree_other[name_col]))
489+
]
490+
data_tree.loc[move_from_condition.index, moved_ind] = True
491+
move_to_condition = data_tree_other[
492+
data_tree_other[name_col].isin(set(data_tree[name_col]))
493+
]
494+
data_tree_other.loc[move_to_condition.index, moved_ind] = True
495+
496+
path_move_from = data_tree.set_index(path_col)[[moved_ind]].to_dict(
497+
orient="index"
498+
)
499+
path_move_to = data_tree_other.set_index(path_col)[[moved_ind]].to_dict(
500+
orient="index"
501+
)
502+
path_move_from_suffix = {
503+
path: "moved from" if v[moved_ind] else "removed"
504+
for path, v in path_move_from.items()
505+
}
506+
path_move_to_suffix = {
507+
path: "moved to" if v[moved_ind] else "added"
508+
for path, v in path_move_to.items()
509+
}
510+
else:
511+
path_move_from_suffix = dict(zip(data_tree[path_col], "-" * len(data_tree)))
512+
path_move_to_suffix = dict(
513+
zip(data_tree_other[path_col], "+" * len(data_tree_other))
514+
)
501515

502516
# Check tree attribute difference
503-
dict_attr_diff: Dict[str, Dict[str, Any]] = {}
517+
path_attr_diff: Dict[str, Dict[str, Any]] = {}
504518
if attr_list:
505519
data_both = data_compare[data_compare[indicator_col] == "both"]
506520
condition_attr_diff = (
@@ -517,7 +531,7 @@ def get_tree_diff(
517531
data_attr_diff = data_both[eval(condition_attr_diff)]
518532
dict_attr_all = data_attr_diff.set_index(path_col).to_dict(orient="index")
519533
for path, node_attr in dict_attr_all.items():
520-
dict_attr_diff[path] = {
534+
path_attr_diff[path] = {
521535
attr: (
522536
node_attr[f"{attr}{old_suffix}"],
523537
node_attr[f"{attr}{new_suffix}"],
@@ -531,24 +545,24 @@ def get_tree_diff(
531545
if only_diff:
532546
data_compare = data_compare[
533547
(data_compare[indicator_col] != "both")
534-
| (data_compare[path_col].isin(dict_attr_diff.keys()))
548+
| (data_compare[path_col].isin(path_attr_diff.keys()))
535549
]
536550
data_compare = data_compare[[path_col]].sort_values(path_col)
537551
if len(data_compare):
538552
tree_diff = construct.dataframe_to_tree(
539553
data_compare, node_type=tree.__class__, sep=tree.sep
540554
)
541-
for path in sorted(path_removed_to_suffix, reverse=True):
555+
for path in sorted(path_move_from_suffix, reverse=True):
542556
_node = search.find_full_path(tree_diff, path)
543-
_node.name += f""" ({path_removed_to_suffix[path]})"""
544-
for path in sorted(path_added_to_suffix, reverse=True):
557+
_node.name += f""" ({path_move_from_suffix[path]})"""
558+
for path in sorted(path_move_to_suffix, reverse=True):
545559
_node = search.find_full_path(tree_diff, path)
546-
_node.name += f""" ({path_added_to_suffix[path]})"""
560+
_node.name += f""" ({path_move_to_suffix[path]})"""
547561

548562
# Handle tree attribute difference
549-
if dict_attr_diff:
550-
tree_diff = construct.add_dict_to_tree_by_path(tree_diff, dict_attr_diff)
551-
for path in sorted(dict_attr_diff, reverse=True):
563+
if path_attr_diff:
564+
tree_diff = construct.add_dict_to_tree_by_path(tree_diff, path_attr_diff)
565+
for path in sorted(path_attr_diff, reverse=True):
552566
_node = search.find_full_path(tree_diff, path)
553567
_node.name += " (~)"
554568
return tree_diff

0 commit comments

Comments
 (0)