diff --git a/setup.py b/setup.py index d02e7158..7c409cf5 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ def readme(): "numpy", "pandas >= 1.1", "llnl-hatchet", + "tqdm", ], extras_require={ "extrap": ["extrap", "matplotlib"], diff --git a/thicket/ensemble.py b/thicket/ensemble.py index 93f76cd6..3e229357 100644 --- a/thicket/ensemble.py +++ b/thicket/ensemble.py @@ -9,6 +9,7 @@ from hatchet import GraphFrame import numpy as np import pandas as pd +import tqdm import thicket.helpers as helpers from .utils import ( @@ -23,12 +24,13 @@ class Ensemble: """Operations pertaining to ensembling.""" @staticmethod - def _unify(thickets, inplace=False): + def _unify(thickets, inplace=False, disable_tqdm=False): """Create union graph from list of thickets and sync their DataFrames. Arguments: thickets (list): list of Thicket objects inplace (bool): whether to modify the original thicket objects or return new + disable_tqdm (bool): whether to disable tqdm progress bar Returns: (tuple): tuple containing: @@ -101,10 +103,12 @@ def _replace_graph_df_nodes(thickets, old_to_new, union_graph): # Unify graphs if "self" and "other" do not have the same graph union_graph = _thickets[0].graph old_to_new = {} - for i in range(len(_thickets) - 1): + pbar = tqdm.tqdm(range(len(_thickets) - 1), disable=disable_tqdm) + for i in pbar: + pbar.set_description("(2/2) Creating Thicket") temp_dict = {} union_graph = union_graph.union(_thickets[i + 1].graph, temp_dict) - # Update both graphs to the union graph + # Set all graphs to the union graph _thickets[i].graph = union_graph _thickets[i + 1].graph = union_graph # Merge the current old_to_new dictionary with the new mappings @@ -121,6 +125,7 @@ def _columns( thickets, headers=None, metadata_key=None, + disable_tqdm=False, ): """Concatenate Thicket attributes horizontally. For DataFrames, this implies expanding in the column direction. New column multi-index will be created with columns @@ -131,6 +136,7 @@ def _columns( metadata_key (str): Name of the column from the metadata tables to replace the 'profile' index. If no argument is provided, it is assumed that there is no profile-wise relationship between the thickets. + disable_tqdm (bool): whether to disable tqdm progress bar Returns: (Thicket): New ensembled Thicket object @@ -342,7 +348,9 @@ def _handle_statsframe(): _check_structures() # Step 1: Unify the thickets. Can be inplace since we are using copies already - union_graph, _thickets = Ensemble._unify(thickets_cp, inplace=True) + union_graph, _thickets = Ensemble._unify( + thickets_cp, inplace=True, disable_tqdm=disable_tqdm + ) combined_th.graph = union_graph thickets_cp = _thickets @@ -361,11 +369,13 @@ def _handle_statsframe(): return combined_th @staticmethod - def _index(thickets): + def _index(thickets, from_statsframes=False, disable_tqdm=False): """Unify a list of thickets into a single thicket Arguments: thickets (list): list of Thicket objects + from_statsframes (bool): Whether this method was invoked from from_statsframes + disable_tqdm (bool): whether to disable tqdm progress bar Returns: unify_graph (hatchet.Graph): unified graph, @@ -422,7 +432,7 @@ def _fill_perfdata(df, numerical_fill_value=np.nan): unify_profile_mapping = OrderedDict() # Unification - unify_graph, thickets = Ensemble._unify(thickets) + unify_graph, thickets = Ensemble._unify(thickets, disable_tqdm=disable_tqdm) for th in thickets: # Extend metrics unify_inc_metrics.extend(th.inc_metrics) diff --git a/thicket/groupby.py b/thicket/groupby.py index f5df0c8a..56b3c1ed 100644 --- a/thicket/groupby.py +++ b/thicket/groupby.py @@ -13,7 +13,7 @@ def __init__(self, by=None, *args, **kwargs): super(GroupBy, self).__init__(*args, **kwargs) self.by = by - def agg(self, func): + def agg(self, func, disable_tqdm=False): """Aggregate the Thickets' PerfData numerical columns in a GroupBy object. Arguments: @@ -28,7 +28,7 @@ def agg(self, func): values_list = list(agg_tks.values()) first_tk = values_list[0] # TODO: Hack to avoid circular import. - agg_tk = first_tk.concat_thickets(values_list) + agg_tk = first_tk.concat_thickets(values_list, disable_tqdm=disable_tqdm) return agg_tk diff --git a/thicket/tests/conftest.py b/thicket/tests/conftest.py index 67c09eae..dd536907 100644 --- a/thicket/tests/conftest.py +++ b/thicket/tests/conftest.py @@ -24,7 +24,7 @@ def thicket_axis_columns(rajaperf_cali_1trial): list: List of original thickets, list of deepcopies of original thickets, and column-joined thicket. """ - tk = Thicket.from_caliperreader(rajaperf_cali_1trial) + tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) gb = tk.groupby("tuning") @@ -38,6 +38,7 @@ def thicket_axis_columns(rajaperf_cali_1trial): axis="columns", headers=headers, metadata_key="ProblemSizeRunParam", + disable_tqdm=True, ) return thickets, thickets_cp, combined_th @@ -54,8 +55,12 @@ def stats_thicket_axis_columns(rajaperf_cuda_block128_1M_cali): list: List of original thickets, list of deepcopies of original thickets, and column-joined thicket. """ - th_cuda128_1 = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali[0:4]) - th_cuda128_2 = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali[5:9]) + th_cuda128_1 = Thicket.from_caliperreader( + rajaperf_cuda_block128_1M_cali[0:4], disable_tqdm=True + ) + th_cuda128_2 = Thicket.from_caliperreader( + rajaperf_cuda_block128_1M_cali[5:9], disable_tqdm=True + ) # To check later if modifications were unexpectedly made th_cuda128_1_deep = th_cuda128_1.deepcopy() @@ -67,6 +72,7 @@ def stats_thicket_axis_columns(rajaperf_cuda_block128_1M_cali): thickets=thickets, axis="columns", headers=["Cuda 1", "Cuda 2"], + disable_tqdm=True, ) return thickets, thickets_cp, combined_th diff --git a/thicket/tests/test_caliperreader.py b/thicket/tests/test_caliperreader.py index 2c5a5463..a9e49961 100644 --- a/thicket/tests/test_caliperreader.py +++ b/thicket/tests/test_caliperreader.py @@ -8,7 +8,7 @@ def test_from_caliperreader(rajaperf_seq_O3_1M_cali): """Sanity test a thicket object with known data.""" - tk = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0]) + tk = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True) # Check the object type assert isinstance(tk, Thicket) diff --git a/thicket/tests/test_concat_thickets.py b/thicket/tests/test_concat_thickets.py index 70b9cc25..132e86e0 100644 --- a/thicket/tests/test_concat_thickets.py +++ b/thicket/tests/test_concat_thickets.py @@ -18,10 +18,10 @@ def test_concat_thickets_index(mpi_scaling_cali): - th_27 = Thicket.from_caliperreader(mpi_scaling_cali[0]) - th_64 = Thicket.from_caliperreader(mpi_scaling_cali[1]) + th_27 = Thicket.from_caliperreader(mpi_scaling_cali[0], disable_tqdm=True) + th_64 = Thicket.from_caliperreader(mpi_scaling_cali[1], disable_tqdm=True) - tk = Thicket.concat_thickets([th_27, th_64]) + tk = Thicket.concat_thickets([th_27, th_64], disable_tqdm=True) # Check dataframe shape assert tk.dataframe.shape == (90, 7) diff --git a/thicket/tests/test_copy.py b/thicket/tests/test_copy.py index ff68f8f8..82fd0bbd 100644 --- a/thicket/tests/test_copy.py +++ b/thicket/tests/test_copy.py @@ -7,7 +7,7 @@ def test_copy(rajaperf_seq_O3_1M_cali): - self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0]) + self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True) self.exc_metrics.append("value") other = self.copy() @@ -68,7 +68,7 @@ def test_copy(rajaperf_seq_O3_1M_cali): def test_deepcopy(rajaperf_seq_O3_1M_cali): - self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0]) + self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True) self.exc_metrics.append("value") other = self.deepcopy() diff --git a/thicket/tests/test_display.py b/thicket/tests/test_display.py index 49f4b98d..0b5cc0f2 100644 --- a/thicket/tests/test_display.py +++ b/thicket/tests/test_display.py @@ -11,7 +11,7 @@ def test_display_histogram(rajaperf_seq_O3_1M_cali): - tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) node = pd.unique(tk.dataframe.reset_index()["node"])[4] @@ -79,7 +79,7 @@ def test_display_histogram_columnar_join(thicket_axis_columns): def test_display_heatmap(rajaperf_seq_O3_1M_cali): - tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) th.stats.variance(tk, columns=["Min time/rank"]) @@ -152,7 +152,7 @@ def test_display_heatmap_columnar_join(thicket_axis_columns): def test_display_boxplot(rajaperf_seq_O3_1M_cali): - tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) nodes = list(pd.unique(tk.dataframe.reset_index()["node"])[0:2]) diff --git a/thicket/tests/test_ensemble.py b/thicket/tests/test_ensemble.py index 806a1bd5..14f5e203 100644 --- a/thicket/tests/test_ensemble.py +++ b/thicket/tests/test_ensemble.py @@ -9,7 +9,7 @@ def test_unify(literal_thickets): tk, tk2, tk3 = literal_thickets - union_graph, _thickets = Ensemble._unify([tk, tk2, tk3]) + union_graph, _thickets = Ensemble._unify([tk, tk2, tk3], disable_tqdm=True) ug_hashes = [0, 1, 2, 3, 4, 5, 6] tk_hashes = [ diff --git a/thicket/tests/test_filter_metadata.py b/thicket/tests/test_filter_metadata.py index 925b9df8..49ae8178 100644 --- a/thicket/tests/test_filter_metadata.py +++ b/thicket/tests/test_filter_metadata.py @@ -172,7 +172,7 @@ def filter_multiple_or(th, columns_values): def test_filter_metadata(rajaperf_seq_O3_1M_cali): # example thicket - th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) # columns and corresponding values to filter by columns_values = {"ProblemSizeRunParam": ["30"], "cluster": ["chekov", "quartz"]} filter_one_column(th, columns_values) diff --git a/thicket/tests/test_filter_stats.py b/thicket/tests/test_filter_stats.py index 91b2242d..1e3acc92 100644 --- a/thicket/tests/test_filter_stats.py +++ b/thicket/tests/test_filter_stats.py @@ -52,7 +52,7 @@ def check_filter_stats(th, columns_values): def test_filter_stats(rajaperf_seq_O3_1M_cali): # example thicket - th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) # columns and corresponding values to filter by columns_values = { "test_string_column": ["less than 20"], diff --git a/thicket/tests/test_from_statsframes.py b/thicket/tests/test_from_statsframes.py index fb57308b..297da20b 100644 --- a/thicket/tests/test_from_statsframes.py +++ b/thicket/tests/test_from_statsframes.py @@ -12,7 +12,7 @@ def test_single_trial(mpi_scaling_cali): th_list = [] for file in mpi_scaling_cali: - th_list.append(th.Thicket.from_caliperreader(file)) + th_list.append(th.Thicket.from_caliperreader(file, disable_tqdm=True)) # Add arbitrary value to aggregated statistics table t_val = 0 @@ -20,7 +20,7 @@ def test_single_trial(mpi_scaling_cali): t.statsframe.dataframe["test"] = t_val t_val += 2 - tk = th.Thicket.from_statsframes(th_list) + tk = th.Thicket.from_statsframes(th_list, disable_tqdm=True) # Check level values assert set(tk.dataframe.index.get_level_values("profile")) == { @@ -33,7 +33,9 @@ def test_single_trial(mpi_scaling_cali): # Check performance data table values assert set(tk.dataframe["test"]) == {0, 2, 4, 6, 8} - tk_named = th.Thicket.from_statsframes(th_list, metadata_key="mpi.world.size") + tk_named = th.Thicket.from_statsframes( + th_list, metadata_key="mpi.world.size", disable_tqdm=True + ) # Check level values assert set(tk_named.dataframe.index.get_level_values("mpi.world.size")) == { diff --git a/thicket/tests/test_groupby.py b/thicket/tests/test_groupby.py index 1f571d6a..ebe208b2 100644 --- a/thicket/tests/test_groupby.py +++ b/thicket/tests/test_groupby.py @@ -82,7 +82,7 @@ def check_groupby(th, columns_values): def test_aggregate(rajaperf_cuda_block128_1M_cali): - tk = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali) + tk = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali, disable_tqdm=True) gb = tk.groupby("spot.format.version") epsilon = 0.000001 @@ -111,15 +111,18 @@ def _check_values(_tk_agg): < epsilon ) - tk_agg = gb.agg(func={"Min time/rank": [np.mean, np.var], "Total time": np.mean}) + tk_agg = gb.agg( + func={"Min time/rank": [np.mean, np.var], "Total time": np.mean}, + disable_tqdm=True, + ) _check_values(tk_agg) - tk_agg = gb.agg(func=[np.mean, np.var]) + tk_agg = gb.agg(func=[np.mean, np.var], disable_tqdm=True) _check_values(tk_agg) def test_groupby(rajaperf_seq_O3_1M_cali): # example thicket - th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) # use cases for string, numeric, and single value columns columns_values = ["user", "launchdate", "cali.channel"] @@ -129,7 +132,7 @@ def test_groupby(rajaperf_seq_O3_1M_cali): def test_groupby_concat_thickets_columns(rajaperf_seq_O3_1M_cali): """Tests case where the Sub-Thickets of a groupby are used in a columnar join""" # example thicket - th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) # Creates four Sub-Thickets column = "unique_col" @@ -149,6 +152,7 @@ def test_groupby_concat_thickets_columns(rajaperf_seq_O3_1M_cali): thickets=thickets, axis="columns", metadata_key=selected_column, + disable_tqdm=True, ) test_concat_thickets_columns((thickets, thickets_cp, combined_th)) @@ -157,7 +161,7 @@ def test_groupby_concat_thickets_columns(rajaperf_seq_O3_1M_cali): def test_groupby_concat_thickets_columns_subthickets(rajaperf_seq_O3_1M_cali): """Tests case where some specific Sub-Thickets of a groupby are used in a columnar join""" # example thicket - th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) # Creates four Sub-Thickets column = "unique_col" @@ -180,6 +184,7 @@ def test_groupby_concat_thickets_columns_subthickets(rajaperf_seq_O3_1M_cali): thickets=thickets, axis="columns", metadata_key=selected_column, + disable_tqdm=True, ) test_concat_thickets_columns((thickets, thickets_cp, combined_th)) diff --git a/thicket/tests/test_intersection.py b/thicket/tests/test_intersection.py index 4f6d71e5..6820f079 100644 --- a/thicket/tests/test_intersection.py +++ b/thicket/tests/test_intersection.py @@ -8,12 +8,12 @@ def test_intersection(rajaperf_cali_1trial): - tk = th.from_caliperreader(rajaperf_cali_1trial) + tk = th.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True) intersected_tk = tk.intersection() intersected_tk_other = th.from_caliperreader( - rajaperf_cali_1trial, intersection=True + rajaperf_cali_1trial, intersection=True, disable_tqdm=True ) # Check other methodology diff --git a/thicket/tests/test_model_extrap.py b/thicket/tests/test_model_extrap.py index 249752ea..a925d610 100644 --- a/thicket/tests/test_model_extrap.py +++ b/thicket/tests/test_model_extrap.py @@ -17,7 +17,7 @@ def test_model_extrap(mpi_scaling_cali): from thicket.model_extrap import Modeling - t_ens = Thicket.from_caliperreader(mpi_scaling_cali) + t_ens = Thicket.from_caliperreader(mpi_scaling_cali, disable_tqdm=True) # Method 1: Model created using metadata column mdl = Modeling( @@ -62,7 +62,7 @@ def test_model_extrap(mpi_scaling_cali): def test_componentize_functions(mpi_scaling_cali): from thicket.model_extrap import Modeling - t_ens = Thicket.from_caliperreader(mpi_scaling_cali) + t_ens = Thicket.from_caliperreader(mpi_scaling_cali, disable_tqdm=True) mdl = Modeling( t_ens, diff --git a/thicket/tests/test_query.py b/thicket/tests/test_query.py index 1bf1f120..8e368f4e 100644 --- a/thicket/tests/test_query.py +++ b/thicket/tests/test_query.py @@ -53,7 +53,7 @@ def check_query(th, hnids, query): def test_query(rajaperf_cuda_block128_1M_cali): # test thicket - th = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali) + th = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali, disable_tqdm=True) # test arguments hnids = [0, 1, 2, 3, 4] # 5, 6, 7 have Nones query = ( diff --git a/thicket/tests/test_stats.py b/thicket/tests/test_stats.py index 9c0ee4f4..3f6a9f9b 100644 --- a/thicket/tests/test_stats.py +++ b/thicket/tests/test_stats.py @@ -11,7 +11,7 @@ def test_mean(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -49,7 +49,7 @@ def test_mean_columnar_join(thicket_axis_columns): def test_median(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -87,7 +87,7 @@ def test_median_columnar_join(thicket_axis_columns): def test_minimum(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -125,7 +125,7 @@ def test_minimum_columnar_join(thicket_axis_columns): def test_maximum(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -163,7 +163,7 @@ def test_maximum_columnar_join(thicket_axis_columns): def test_std(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -201,7 +201,7 @@ def test_std_columnar_join(thicket_axis_columns): def test_percentiles(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -232,7 +232,7 @@ def test_percentiles(rajaperf_seq_O3_1M_cali): def test_percentiles_none(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) th.stats.percentiles(th_ens, columns=["Min time/rank"], percentiles=None) @@ -242,7 +242,7 @@ def test_percentiles_none(rajaperf_seq_O3_1M_cali): def test_percentiles_single_value(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) th.stats.percentiles(th_ens, columns=["Min time/rank"], percentiles=[0.3]) @@ -366,7 +366,7 @@ def test_percentiles_columnar_join(thicket_axis_columns): def test_variance(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -404,7 +404,9 @@ def test_variance_columnar_join(thicket_axis_columns): def test_normality(rajaperf_cuda_block128_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali) + th_ens = th.Thicket.from_caliperreader( + rajaperf_cuda_block128_1M_cali, disable_tqdm=True + ) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -459,7 +461,9 @@ def test_normality_columnar_join(thicket_axis_columns, stats_thicket_axis_column def test_correlation(rajaperf_cuda_block128_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali) + th_ens = th.Thicket.from_caliperreader( + rajaperf_cuda_block128_1M_cali, disable_tqdm=True + ) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values @@ -499,7 +503,7 @@ def test_correlation_columnar_join(thicket_axis_columns): def test_boxplot(rajaperf_seq_O3_1M_cali): - th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali) + th_ens = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True) assert sorted(th_ens.dataframe.index.get_level_values(0).unique()) == sorted( th_ens.statsframe.dataframe.index.values diff --git a/thicket/tests/test_thicket.py b/thicket/tests/test_thicket.py index 2be23d17..65a0a748 100644 --- a/thicket/tests/test_thicket.py +++ b/thicket/tests/test_thicket.py @@ -50,16 +50,16 @@ def test_resolve_missing_indicies(): def test_statsframe(rajaperf_seq_O3_1M_cali): def _test_multiindex(): """Test statsframe when headers are multiindexed.""" - th1 = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0]) - th2 = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[1]) - th_cj = Thicket.concat_thickets([th1, th2], axis="columns") + th1 = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True) + th2 = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[1], disable_tqdm=True) + th_cj = Thicket.concat_thickets([th1, th2], axis="columns", disable_tqdm=True) # Check column format assert ("name", "") in th_cj.statsframe.dataframe.columns _test_multiindex() - th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[-1]) + th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[-1], disable_tqdm=True) # Arbitrary value insertion in aggregated statistics table. th.statsframe.dataframe["test"] = 1 @@ -81,7 +81,7 @@ def _test_multiindex(): def test_metadata_column_to_perfdata(mpi_scaling_cali): - t_ens = Thicket.from_caliperreader(mpi_scaling_cali) + t_ens = Thicket.from_caliperreader(mpi_scaling_cali, disable_tqdm=True) example_column = "jobsize" example_column_metrics = [27, 64, 125, 216, 343] @@ -122,7 +122,9 @@ def test_thicketize_graphframe(rajaperf_seq_O3_1M_cali): def test_unique_metadata_base_cuda(rajaperf_cuda_block128_1M_cali): - t_ens = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali) + t_ens = Thicket.from_caliperreader( + rajaperf_cuda_block128_1M_cali, disable_tqdm=True + ) res = t_ens.get_unique_metadata() assert res["systype_build"] == ["blueos_3_ppc64le_ib_p9"] diff --git a/thicket/thicket.py b/thicket/thicket.py index 75f5ce8d..bec42f9e 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -15,6 +15,7 @@ import numpy as np from hatchet import GraphFrame from hatchet.query import AbstractQuery, QueryMatcher +import tqdm from thicket.ensemble import Ensemble import thicket.helpers as helpers @@ -183,7 +184,9 @@ def thicketize_graphframe(gf, prf): return th @staticmethod - def from_caliper(filename_or_stream, query=None, intersection=False): + def from_caliper( + filename_or_stream, query=None, intersection=False, disable_tqdm=False + ): """Read in a Caliper .cali or .json file. Arguments: @@ -191,38 +194,50 @@ def from_caliper(filename_or_stream, query=None, intersection=False): `.cali` or JSON-split format, or an open file object to read one query (str): cali-query in CalQL format intersection (bool): whether to perform intersection or union (default) + disable_tqdm (bool): whether to display tqdm progress bar """ return Thicket.reader_dispatch( - GraphFrame.from_caliper, intersection, filename_or_stream, query + GraphFrame.from_caliper, + intersection, + disable_tqdm, + filename_or_stream, + query, ) @staticmethod - def from_hpctoolkit(dirname, intersection=False): + def from_hpctoolkit(dirname, intersection=False, disable_tqdm=False): """Create a GraphFrame using hatchet's HPCToolkit reader and use its attributes to make a new thicket. Arguments: dirname (str): parent directory of an HPCToolkit experiment.xml file intersection (bool): whether to perform intersection or union (default) + disable_tqdm (bool): whether to display tqdm progress bar Returns: (thicket): new thicket containing HPCToolkit profile data """ return Thicket.reader_dispatch( - GraphFrame.from_hpctoolkit, intersection, dirname + GraphFrame.from_hpctoolkit, intersection, disable_tqdm, dirname ) @staticmethod - def from_caliperreader(filename_or_caliperreader, intersection=False): + def from_caliperreader( + filename_or_caliperreader, intersection=False, disable_tqdm=False + ): """Helper function to read one caliper file. Arguments: filename_or_caliperreader (str or CaliperReader): name of a Caliper output file in `.cali` format, or a CaliperReader object intersection (bool): whether to perform intersection or union (default) + disable_tqdm (bool): whether to display tqdm progress bar """ return Thicket.reader_dispatch( - GraphFrame.from_caliperreader, intersection, filename_or_caliperreader + GraphFrame.from_caliperreader, + intersection, + disable_tqdm, + filename_or_caliperreader, ) @staticmethod @@ -262,12 +277,13 @@ def from_literal(graph_dict): return tk @staticmethod - def reader_dispatch(func, intersection=False, *args, **kwargs): + def reader_dispatch(func, intersection, disable_tqdm, *args, **kwargs): """Create a thicket from a list, directory of files, or a single file. Arguments: func (function): reader function to be used intersection (bool): whether to perform intersection or union (default). + tdmq_output (bool): whether to display tqdm progress bar args (list): list of args; args[0] should be an object that can be read from """ ens_list = [] @@ -275,11 +291,14 @@ def reader_dispatch(func, intersection=False, *args, **kwargs): extra_args = [] if len(args) > 1: extra_args = args[1:] + pbar_desc = "(1/2) Reading Files" # Parse the input object # if a list of files if isinstance(obj, (list, tuple)): - for file in obj: + pbar = tqdm.tqdm(obj, disable=disable_tqdm) + for file in pbar: + pbar.set_description(pbar_desc) ens_list.append( Thicket.thicketize_graphframe( func(file, *extra_args, **kwargs), file @@ -287,7 +306,9 @@ def reader_dispatch(func, intersection=False, *args, **kwargs): ) # if directory of files elif os.path.isdir(obj): - for file in os.listdir(obj): + pbar = tqdm.tqdm(os.listdir(obj), disable=disable_tqdm) + for file in pbar: + pbar.set_description(pbar_desc) f = os.path.join(obj, file) ens_list.append( Thicket.thicketize_graphframe(func(f, *extra_args, **kwargs), f) @@ -314,12 +335,15 @@ def reader_dispatch(func, intersection=False, *args, **kwargs): thickets=ens_list, axis="index", calltree=calltree, + disable_tqdm=disable_tqdm, ) return thicket_object @staticmethod - def concat_thickets(thickets, axis="index", calltree="union", **kwargs): + def concat_thickets( + thickets, axis="index", calltree="union", disable_tqdm=False, **kwargs + ): """Concatenate thickets together on index or columns. The calltree can either be unioned or intersected which will affect the other structures. @@ -339,8 +363,12 @@ def concat_thickets(thickets, axis="index", calltree="union", **kwargs): (thicket): concatenated thicket """ - def _index(thickets): - thicket_parts = Ensemble._index(thickets=thickets) + def _index(thickets, from_statsframes=False, disable_tqdm=disable_tqdm): + thicket_parts = Ensemble._index( + thickets=thickets, + from_statsframes=from_statsframes, + disable_tqdm=disable_tqdm, + ) return Thicket( graph=thicket_parts[0], @@ -352,9 +380,14 @@ def _index(thickets): profile_mapping=thicket_parts[6], ) - def _columns(thickets, headers=None, metadata_key=None): + def _columns( + thickets, headers=None, metadata_key=None, disable_tqdm=disable_tqdm + ): combined_thicket = Ensemble._columns( - thickets=thickets, headers=headers, metadata_key=metadata_key + thickets=thickets, + headers=headers, + metadata_key=metadata_key, + disable_tqdm=disable_tqdm, ) return combined_thicket @@ -696,7 +729,7 @@ def tree( ) @staticmethod - def from_statsframes(tk_list, metadata_key=None): + def from_statsframes(tk_list, metadata_key=None, disable_tqdm=False): """Compose a list of Thickets with data in their statsframes. The Thicket's individual aggregated statistics tables are ensembled and become the @@ -784,7 +817,9 @@ def _agg_to_set(obj): # Append copy to list tk_copy_list.append(tk_copy) - return Thicket.concat_thickets(tk_copy_list) + return Thicket.concat_thickets( + tk_copy_list, from_statsframes=True, disable_tqdm=disable_tqdm + ) def to_json(self, ensemble=True, metadata=True, stats=True): jsonified_thicket = {}