@@ -439,7 +439,7 @@ def get_tree_diff(
439
439
indicator_col = "Exists"
440
440
old_suffix = "_old"
441
441
new_suffix = "_new"
442
- tree_sep = tree . sep
442
+ moved_ind = "moved_ind"
443
443
444
444
data , data_other = (
445
445
export .tree_to_dataframe (
@@ -475,32 +475,46 @@ def get_tree_diff(
475
475
data_path_diff = data_compare
476
476
477
477
# Handle tree structure difference
478
- paths_removed = list (
479
- data_path_diff [data_path_diff [indicator_col ] == "left_only" ][path_col ]
480
- )[::- 1 ]
481
- paths_added = list (
482
- data_path_diff [data_path_diff [indicator_col ] == "right_only" ][path_col ]
483
- )[::- 1 ]
484
-
485
- moved_from_ind : List [bool ] = [True for _ in range (len (paths_removed ))]
486
- moved_to_ind : List [bool ] = [True for _ in range (len (paths_added ))]
478
+ data_tree = data_path_diff [data_path_diff [indicator_col ] == "left_only" ]
479
+ data_tree_other = data_path_diff [data_path_diff [indicator_col ] == "right_only" ]
480
+
487
481
if detail :
488
- names_removed = [path .split (tree_sep )[- 1 ] for path in paths_removed ]
489
- names_added = [path .split (tree_sep )[- 1 ] for path in paths_added ]
490
- moved_from_ind = [name in names_added for name in names_removed ]
491
- moved_to_ind = [name in names_removed for name in names_added ]
492
-
493
- path_removed_to_suffix = {
494
- path : "-" if not detail else ("moved from" if move_ind else "removed" )
495
- for path , move_ind in zip (paths_removed , moved_from_ind )
496
- }
497
- path_added_to_suffix = {
498
- path : "+" if not detail else ("moved to" if move_ind else "added" )
499
- for path , move_ind in zip (paths_added , moved_to_ind )
500
- }
482
+ data_tree [moved_ind ] = False
483
+ data_tree_other [moved_ind ] = False
484
+
485
+ if len (data_tree ) and len (data_tree_other ):
486
+ # Check for moved from and moved to
487
+ move_from_condition = data_tree [
488
+ data_tree [name_col ].isin (set (data_tree_other [name_col ]))
489
+ ]
490
+ data_tree .loc [move_from_condition .index , moved_ind ] = True
491
+ move_to_condition = data_tree_other [
492
+ data_tree_other [name_col ].isin (set (data_tree [name_col ]))
493
+ ]
494
+ data_tree_other .loc [move_to_condition .index , moved_ind ] = True
495
+
496
+ path_move_from = data_tree .set_index (path_col )[[moved_ind ]].to_dict (
497
+ orient = "index"
498
+ )
499
+ path_move_to = data_tree_other .set_index (path_col )[[moved_ind ]].to_dict (
500
+ orient = "index"
501
+ )
502
+ path_move_from_suffix = {
503
+ path : "moved from" if v [moved_ind ] else "removed"
504
+ for path , v in path_move_from .items ()
505
+ }
506
+ path_move_to_suffix = {
507
+ path : "moved to" if v [moved_ind ] else "added"
508
+ for path , v in path_move_to .items ()
509
+ }
510
+ else :
511
+ path_move_from_suffix = dict (zip (data_tree [path_col ], "-" * len (data_tree )))
512
+ path_move_to_suffix = dict (
513
+ zip (data_tree_other [path_col ], "+" * len (data_tree_other ))
514
+ )
501
515
502
516
# Check tree attribute difference
503
- dict_attr_diff : Dict [str , Dict [str , Any ]] = {}
517
+ path_attr_diff : Dict [str , Dict [str , Any ]] = {}
504
518
if attr_list :
505
519
data_both = data_compare [data_compare [indicator_col ] == "both" ]
506
520
condition_attr_diff = (
@@ -517,7 +531,7 @@ def get_tree_diff(
517
531
data_attr_diff = data_both [eval (condition_attr_diff )]
518
532
dict_attr_all = data_attr_diff .set_index (path_col ).to_dict (orient = "index" )
519
533
for path , node_attr in dict_attr_all .items ():
520
- dict_attr_diff [path ] = {
534
+ path_attr_diff [path ] = {
521
535
attr : (
522
536
node_attr [f"{ attr } { old_suffix } " ],
523
537
node_attr [f"{ attr } { new_suffix } " ],
@@ -531,24 +545,24 @@ def get_tree_diff(
531
545
if only_diff :
532
546
data_compare = data_compare [
533
547
(data_compare [indicator_col ] != "both" )
534
- | (data_compare [path_col ].isin (dict_attr_diff .keys ()))
548
+ | (data_compare [path_col ].isin (path_attr_diff .keys ()))
535
549
]
536
550
data_compare = data_compare [[path_col ]].sort_values (path_col )
537
551
if len (data_compare ):
538
552
tree_diff = construct .dataframe_to_tree (
539
553
data_compare , node_type = tree .__class__ , sep = tree .sep
540
554
)
541
- for path in sorted (path_removed_to_suffix , reverse = True ):
555
+ for path in sorted (path_move_from_suffix , reverse = True ):
542
556
_node = search .find_full_path (tree_diff , path )
543
- _node .name += f""" ({ path_removed_to_suffix [path ]} )"""
544
- for path in sorted (path_added_to_suffix , reverse = True ):
557
+ _node .name += f""" ({ path_move_from_suffix [path ]} )"""
558
+ for path in sorted (path_move_to_suffix , reverse = True ):
545
559
_node = search .find_full_path (tree_diff , path )
546
- _node .name += f""" ({ path_added_to_suffix [path ]} )"""
560
+ _node .name += f""" ({ path_move_to_suffix [path ]} )"""
547
561
548
562
# Handle tree attribute difference
549
- if dict_attr_diff :
550
- tree_diff = construct .add_dict_to_tree_by_path (tree_diff , dict_attr_diff )
551
- for path in sorted (dict_attr_diff , reverse = True ):
563
+ if path_attr_diff :
564
+ tree_diff = construct .add_dict_to_tree_by_path (tree_diff , path_attr_diff )
565
+ for path in sorted (path_attr_diff , reverse = True ):
552
566
_node = search .find_full_path (tree_diff , path )
553
567
_node .name += " (~)"
554
568
return tree_diff
0 commit comments