5
5
from bigtree .tree import construct , export , search
6
6
from bigtree .utils import assertions , exceptions , iterators
7
7
8
+ try :
9
+ import pandas as pd
10
+ except ImportError : # pragma: no cover
11
+ from unittest .mock import MagicMock
12
+
13
+ pd = MagicMock ()
14
+
8
15
__all__ = ["clone_tree" , "get_subtree" , "prune_tree" , "get_tree_diff" ]
9
16
BaseNodeT = TypeVar ("BaseNodeT" , bound = basenode .BaseNode )
10
17
BinaryNodeT = TypeVar ("BinaryNodeT" , bound = binarynode .BinaryNode )
@@ -237,6 +244,7 @@ def prune_tree(
237
244
return tree_copy
238
245
239
246
247
+ @exceptions .optional_dependencies_pandas
240
248
def get_tree_diff (
241
249
tree : node .Node ,
242
250
other_tree : node .Node ,
@@ -376,6 +384,7 @@ def get_tree_diff(
376
384
name_col = "name"
377
385
path_col = "PATH"
378
386
indicator_col = "Exists"
387
+ tree_sep = tree .sep
379
388
380
389
data , data_other = (
381
390
export .tree_to_dataframe (
@@ -406,11 +415,12 @@ def get_tree_diff(
406
415
moved_from_indicator : List [bool ] = [True for _ in range (len (nodes_removed ))]
407
416
moved_to_indicator : List [bool ] = [True for _ in range (len (nodes_added ))]
408
417
if detail :
409
- _sep = tree .sep
410
418
node_names_removed = [
411
- node_removed .split (_sep )[- 1 ] for node_removed in nodes_removed
419
+ node_removed .split (tree_sep )[- 1 ] for node_removed in nodes_removed
420
+ ]
421
+ node_names_added = [
422
+ node_added .split (tree_sep )[- 1 ] for node_added in nodes_added
412
423
]
413
- node_names_added = [node_added .split (_sep )[- 1 ] for node_added in nodes_added ]
414
424
moved_from_indicator = [
415
425
node_name_removed in node_names_added
416
426
for node_name_removed in node_names_removed
@@ -420,15 +430,39 @@ def get_tree_diff(
420
430
for node_name_added in node_names_added
421
431
]
422
432
433
+ def add_suffix_to_path (
434
+ _data : pd .DataFrame , _condition : pd .Series , _original_name : str , _suffix : str
435
+ ) -> pd .DataFrame :
436
+ """Add suffix to path string
437
+
438
+ Args:
439
+ _data (pd.DataFrame): original data with path column
440
+ _condition (pd.Series): whether to add suffix, contains True/False values
441
+ _original_name (str): path prefix to add suffix to
442
+ _suffix (str): suffix to add to path column
443
+
444
+ Returns:
445
+ (pd.DataFrame)
446
+ """
447
+ data_replace = _data [_condition ]
448
+ data_replace [path_col ] = data_replace [path_col ].str .replace (
449
+ _original_name , f"{ _original_name } ({ suffix } )" , regex = True
450
+ )
451
+ data_not_replace = _data [~ _condition ]
452
+ return data_replace ._append (data_not_replace ).sort_index ()
453
+
423
454
for node_removed , move_indicator in zip (nodes_removed , moved_from_indicator ):
424
455
if not detail :
425
456
suffix = "-"
426
457
elif move_indicator :
427
458
suffix = "moved from"
428
459
else :
429
460
suffix = "removed"
430
- data_both [path_col ] = data_both [path_col ].str .replace (
431
- node_removed , f"{ node_removed } ({ suffix } )" , regex = True
461
+ condition_node_removed = data_both [path_col ].str .endswith (
462
+ node_removed
463
+ ) | data_both [path_col ].str .contains (node_removed + tree_sep )
464
+ data_both = add_suffix_to_path (
465
+ data_both , condition_node_removed , node_removed , suffix
432
466
)
433
467
for node_added , move_indicator in zip (nodes_added , moved_to_indicator ):
434
468
if not detail :
@@ -437,8 +471,11 @@ def get_tree_diff(
437
471
suffix = "moved to"
438
472
else :
439
473
suffix = "added"
440
- data_both [path_col ] = data_both [path_col ].str .replace (
441
- node_added , f"{ node_added } ({ suffix } )" , regex = True
474
+ condition_node_added = data_both [path_col ].str .endswith (node_added ) | data_both [
475
+ path_col
476
+ ].str .contains (node_added + tree_sep )
477
+ data_both = add_suffix_to_path (
478
+ data_both , condition_node_added , node_added , suffix
442
479
)
443
480
444
481
# Check tree attribute difference
0 commit comments