From 6dade46b451f11ab2601f94ce7e892fd5309552c Mon Sep 17 00:00:00 2001 From: Michael Richard McKinsey Date: Fri, 8 Sep 2023 16:42:31 -0500 Subject: [PATCH 1/5] Improve compatibilty in axis=column case. Allow inner index to be other than 'profile' --- thicket/ensemble.py | 42 ++++++++++++++++++++++++------------------ thicket/utils.py | 3 ++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/thicket/ensemble.py b/thicket/ensemble.py index c129175a..7f081f9e 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -76,9 +76,10 @@ def _check_structures(): """Check that the structures of the thicket objects are valid for the incoming operations.""" # Required/expected format of the data for th in thickets: - verify_thicket_structures(th.dataframe, index=["node", "profile"]) + assert th.dataframe.index.nlevels == 2 + assert th.metadata.index.nlevels == 1 + assert th.dataframe.index.names[1] == th.metadata.index.name verify_thicket_structures(th.statsframe.dataframe, index=["node"]) - verify_thicket_structures(th.metadata, index=["profile"]) # Check for metadata_key in metadata if metadata_key: for th in thickets: @@ -88,10 +89,10 @@ def _check_structures(): for i in range(len(thickets) - 1): if len(thickets[i].profile) != len(thickets[i + 1].profile): raise ValueError( - "Length of all thicket profiles must match if 'metadata_key' is not provided. {} != {}".format( - len(thickets[i].profile), len(thickets[i + 1].profile) + f"Length of all thicket profiles must match if 'metadata_key' is not provided. {len(thickets[i].profile)} != {len(thickets[i + 1].profile)}" ) - ) + if metadata_key != th.metadata.index.name: + verify_thicket_structures(th.metadata, columns=[metadata_key]) # Ensure all thickets profiles are sorted. Must be true when metadata_key=None to # guarantee performance data table and metadata table match up. if metadata_key is None: @@ -120,14 +121,16 @@ def _create_multiindex_columns(df, upper_idx_name): def _handle_metadata(): """Handle operations to create new concatenated columnar axis metadata table.""" # Update index to reflect performance data table index - for i in range(len(thickets_cp)): - thickets_cp[i].metadata.reset_index(drop=True, inplace=True) + if metadata_key != inner_idx: + for i in range(len(thickets_cp)): + thickets_cp[i].metadata.reset_index(drop=True, inplace=True) if metadata_key is None: for i in range(len(thickets_cp)): thickets_cp[i].metadata.index.set_names("profile", inplace=True) else: for i in range(len(thickets_cp)): - thickets_cp[i].metadata.set_index(metadata_key, inplace=True) + if metadata_key != inner_idx: + thickets_cp[i].metadata.set_index(metadata_key, inplace=True) thickets_cp[i].metadata.sort_index(inplace=True) # Create multi-index columns @@ -179,17 +182,17 @@ def _handle_perfdata(): thickets_cp[i].metadata_column_to_perfdata( "new_profiles", drop=True ) - thickets_cp[i].dataframe.reset_index(level="profile", inplace=True) + thickets_cp[i].dataframe.reset_index(level=inner_idx, inplace=True) new_mappings.update( pd.Series( thickets_cp[i] .dataframe["new_profiles"] .map(lambda x: (x, headers[i])) .values, - index=thickets_cp[i].dataframe["profile"], + index=thickets_cp[i].dataframe[inner_idx], ).to_dict() ) - thickets_cp[i].dataframe.drop("profile", axis=1, inplace=True) + thickets_cp[i].dataframe.drop(inner_idx, axis=1, inplace=True) thickets_cp[i].dataframe.set_index( "new_profiles", append=True, inplace=True ) @@ -198,18 +201,20 @@ def _handle_perfdata(): ) else: # Change second-level index to be from metadata's "metadata_key" column for i in range(len(thickets_cp)): - thickets_cp[i].metadata_column_to_perfdata(metadata_key) - thickets_cp[i].dataframe.reset_index(level="profile", inplace=True) + if metadata_key not in thickets_cp[i].dataframe.index.names: + thickets_cp[i].add_column_from_metadata_to_ensemble(metadata_key) + thickets_cp[i].dataframe.reset_index(level=inner_idx, inplace=True) new_mappings.update( pd.Series( thickets_cp[i] .dataframe[metadata_key] .map(lambda x: (x, headers[i])) .values, - index=thickets_cp[i].dataframe["profile"], + index=thickets_cp[i].dataframe[inner_idx], ).to_dict() ) - thickets_cp[i].dataframe.drop("profile", axis=1, inplace=True) + if inner_idx != metadata_key: + thickets_cp[i].dataframe.drop(inner_idx, axis=1, inplace=True) thickets_cp[i].dataframe.set_index( metadata_key, append=True, inplace=True ) @@ -266,11 +271,12 @@ def _handle_statsframe(): ), ) - # Step 0A: Pre-check of data structures - _check_structures() - # Step 0B: Variable Initialization + # Step 0A: Variable Initialization combined_th = thickets[0].deepcopy() thickets_cp = [th.deepcopy() for th in thickets] + inner_idx = thickets_cp[0].dataframe.index.names[1] + # Step 0B: Pre-check of data structures + _check_structures() # Step 1: Unify the thickets union_graph, _thickets = Ensemble._unify(thickets_cp) diff --git a/thicket/utils.py b/thicket/utils.py index 3f2c2435..5002bba4 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -54,8 +54,9 @@ def verify_sorted_profile(thicket_component): Arguments: thicket_component (DataFrame): component of thicket to check """ + profile_index_values = list( - OrderedDict.fromkeys(thicket_component.index.get_level_values("profile")) + OrderedDict.fromkeys(thicket_component.index.get_level_values(thicket_component.index.nlevels-1)) ) if profile_index_values != sorted(profile_index_values): raise ValueError( From 3159f2836174b66ed4d17078c80085e5d3bbc8f1 Mon Sep 17 00:00:00 2001 From: Michael Richard McKinsey Date: Fri, 8 Sep 2023 16:47:33 -0500 Subject: [PATCH 2/5] Black --- thicket/ensemble.py | 4 +++- thicket/utils.py | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/thicket/ensemble.py b/thicket/ensemble.py index 7f081f9e..0deb2252 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -202,7 +202,9 @@ def _handle_perfdata(): else: # Change second-level index to be from metadata's "metadata_key" column for i in range(len(thickets_cp)): if metadata_key not in thickets_cp[i].dataframe.index.names: - thickets_cp[i].add_column_from_metadata_to_ensemble(metadata_key) + thickets_cp[i].add_column_from_metadata_to_ensemble( + metadata_key + ) thickets_cp[i].dataframe.reset_index(level=inner_idx, inplace=True) new_mappings.update( pd.Series( diff --git a/thicket/utils.py b/thicket/utils.py index 5002bba4..0e01bad3 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -56,7 +56,11 @@ def verify_sorted_profile(thicket_component): """ profile_index_values = list( - OrderedDict.fromkeys(thicket_component.index.get_level_values(thicket_component.index.nlevels-1)) + OrderedDict.fromkeys( + thicket_component.index.get_level_values( + thicket_component.index.nlevels - 1 + ) # Innermost index + ) ) if profile_index_values != sorted(profile_index_values): raise ValueError( From 24b1ba3309695e7ddecda60d0c7dfc1e30ce5c8c Mon Sep 17 00:00:00 2001 From: Michael Richard McKinsey Date: Fri, 8 Sep 2023 16:55:07 -0500 Subject: [PATCH 3/5] Black --- thicket/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thicket/utils.py b/thicket/utils.py index 0e01bad3..caef166e 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -59,7 +59,7 @@ def verify_sorted_profile(thicket_component): OrderedDict.fromkeys( thicket_component.index.get_level_values( thicket_component.index.nlevels - 1 - ) # Innermost index + ) # Innermost index ) ) if profile_index_values != sorted(profile_index_values): From 2c85b60c74599f5d0f16bfd954cd638be77ecf0b Mon Sep 17 00:00:00 2001 From: Michael Richard McKinsey Date: Wed, 1 Nov 2023 13:34:39 -0500 Subject: [PATCH 4/5] Fix rebasing bug --- thicket/ensemble.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/thicket/ensemble.py b/thicket/ensemble.py index 0deb2252..1535c501 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -83,16 +83,15 @@ def _check_structures(): # Check for metadata_key in metadata if metadata_key: for th in thickets: - verify_thicket_structures(th.metadata, columns=[metadata_key]) + if metadata_key != th.metadata.index.name: + verify_thicket_structures(th.metadata, columns=[metadata_key]) # Check length of profiles match if metadata key is not provided if metadata_key is None: for i in range(len(thickets) - 1): if len(thickets[i].profile) != len(thickets[i + 1].profile): raise ValueError( f"Length of all thicket profiles must match if 'metadata_key' is not provided. {len(thickets[i].profile)} != {len(thickets[i + 1].profile)}" - ) - if metadata_key != th.metadata.index.name: - verify_thicket_structures(th.metadata, columns=[metadata_key]) + ) # Ensure all thickets profiles are sorted. Must be true when metadata_key=None to # guarantee performance data table and metadata table match up. if metadata_key is None: @@ -202,9 +201,7 @@ def _handle_perfdata(): else: # Change second-level index to be from metadata's "metadata_key" column for i in range(len(thickets_cp)): if metadata_key not in thickets_cp[i].dataframe.index.names: - thickets_cp[i].add_column_from_metadata_to_ensemble( - metadata_key - ) + thickets_cp[i].metadata_column_to_perfdata(metadata_key) thickets_cp[i].dataframe.reset_index(level=inner_idx, inplace=True) new_mappings.update( pd.Series( From f92e65add578b1918bf1f52198dc23997094272d Mon Sep 17 00:00:00 2001 From: Stephanie Labasan Brink Date: Fri, 22 Dec 2023 11:31:21 -0800 Subject: [PATCH 5/5] remove extra newline --- thicket/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/thicket/utils.py b/thicket/utils.py index caef166e..0fd09239 100644 --- a/thicket/utils.py +++ b/thicket/utils.py @@ -54,7 +54,6 @@ def verify_sorted_profile(thicket_component): Arguments: thicket_component (DataFrame): component of thicket to check """ - profile_index_values = list( OrderedDict.fromkeys( thicket_component.index.get_level_values(