Skip to content

Commit

Permalink
Add Progress Bar to Reader (#131)
Browse files Browse the repository at this point in the history
* Add option to disable tqdm output

* Disable progress bar manually in tests
  • Loading branch information
michaelmckinsey1 authored Apr 8, 2024
1 parent 292135c commit 24c1ed5
Show file tree
Hide file tree
Showing 19 changed files with 136 additions and 71 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def readme():
"numpy",
"pandas >= 1.1",
"llnl-hatchet",
"tqdm",
],
extras_require={
"extrap": ["extrap", "matplotlib"],
Expand Down
22 changes: 16 additions & 6 deletions thicket/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from hatchet import GraphFrame
import numpy as np
import pandas as pd
import tqdm

import thicket.helpers as helpers
from .utils import (
Expand All @@ -23,12 +24,13 @@ class Ensemble:
"""Operations pertaining to ensembling."""

@staticmethod
def _unify(thickets, inplace=False):
def _unify(thickets, inplace=False, disable_tqdm=False):
"""Create union graph from list of thickets and sync their DataFrames.
Arguments:
thickets (list): list of Thicket objects
inplace (bool): whether to modify the original thicket objects or return new
disable_tqdm (bool): whether to disable tqdm progress bar
Returns:
(tuple): tuple containing:
Expand Down Expand Up @@ -101,10 +103,12 @@ def _replace_graph_df_nodes(thickets, old_to_new, union_graph):
# Unify graphs if "self" and "other" do not have the same graph
union_graph = _thickets[0].graph
old_to_new = {}
for i in range(len(_thickets) - 1):
pbar = tqdm.tqdm(range(len(_thickets) - 1), disable=disable_tqdm)
for i in pbar:
pbar.set_description("(2/2) Creating Thicket")
temp_dict = {}
union_graph = union_graph.union(_thickets[i + 1].graph, temp_dict)
# Update both graphs to the union graph
# Set all graphs to the union graph
_thickets[i].graph = union_graph
_thickets[i + 1].graph = union_graph
# Merge the current old_to_new dictionary with the new mappings
Expand All @@ -121,6 +125,7 @@ def _columns(
thickets,
headers=None,
metadata_key=None,
disable_tqdm=False,
):
"""Concatenate Thicket attributes horizontally. For DataFrames, this implies expanding
in the column direction. New column multi-index will be created with columns
Expand All @@ -131,6 +136,7 @@ def _columns(
metadata_key (str): Name of the column from the metadata tables to replace the 'profile'
index. If no argument is provided, it is assumed that there is no profile-wise
relationship between the thickets.
disable_tqdm (bool): whether to disable tqdm progress bar
Returns:
(Thicket): New ensembled Thicket object
Expand Down Expand Up @@ -342,7 +348,9 @@ def _handle_statsframe():
_check_structures()

# Step 1: Unify the thickets. Can be inplace since we are using copies already
union_graph, _thickets = Ensemble._unify(thickets_cp, inplace=True)
union_graph, _thickets = Ensemble._unify(
thickets_cp, inplace=True, disable_tqdm=disable_tqdm
)
combined_th.graph = union_graph
thickets_cp = _thickets

Expand All @@ -361,11 +369,13 @@ def _handle_statsframe():
return combined_th

@staticmethod
def _index(thickets):
def _index(thickets, from_statsframes=False, disable_tqdm=False):
"""Unify a list of thickets into a single thicket
Arguments:
thickets (list): list of Thicket objects
from_statsframes (bool): Whether this method was invoked from from_statsframes
disable_tqdm (bool): whether to disable tqdm progress bar
Returns:
unify_graph (hatchet.Graph): unified graph,
Expand Down Expand Up @@ -422,7 +432,7 @@ def _fill_perfdata(df, numerical_fill_value=np.nan):
unify_profile_mapping = OrderedDict()

# Unification
unify_graph, thickets = Ensemble._unify(thickets)
unify_graph, thickets = Ensemble._unify(thickets, disable_tqdm=disable_tqdm)
for th in thickets:
# Extend metrics
unify_inc_metrics.extend(th.inc_metrics)
Expand Down
4 changes: 2 additions & 2 deletions thicket/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, by=None, *args, **kwargs):
super(GroupBy, self).__init__(*args, **kwargs)
self.by = by

def agg(self, func):
def agg(self, func, disable_tqdm=False):
"""Aggregate the Thickets' PerfData numerical columns in a GroupBy object.
Arguments:
Expand All @@ -28,7 +28,7 @@ def agg(self, func):

values_list = list(agg_tks.values())
first_tk = values_list[0] # TODO: Hack to avoid circular import.
agg_tk = first_tk.concat_thickets(values_list)
agg_tk = first_tk.concat_thickets(values_list, disable_tqdm=disable_tqdm)

return agg_tk

Expand Down
12 changes: 9 additions & 3 deletions thicket/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def thicket_axis_columns(rajaperf_cali_1trial):
list: List of original thickets, list of deepcopies of original thickets, and
column-joined thicket.
"""
tk = Thicket.from_caliperreader(rajaperf_cali_1trial)
tk = Thicket.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True)

gb = tk.groupby("tuning")

Expand All @@ -38,6 +38,7 @@ def thicket_axis_columns(rajaperf_cali_1trial):
axis="columns",
headers=headers,
metadata_key="ProblemSizeRunParam",
disable_tqdm=True,
)

return thickets, thickets_cp, combined_th
Expand All @@ -54,8 +55,12 @@ def stats_thicket_axis_columns(rajaperf_cuda_block128_1M_cali):
list: List of original thickets, list of deepcopies of original thickets, and
column-joined thicket.
"""
th_cuda128_1 = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali[0:4])
th_cuda128_2 = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali[5:9])
th_cuda128_1 = Thicket.from_caliperreader(
rajaperf_cuda_block128_1M_cali[0:4], disable_tqdm=True
)
th_cuda128_2 = Thicket.from_caliperreader(
rajaperf_cuda_block128_1M_cali[5:9], disable_tqdm=True
)

# To check later if modifications were unexpectedly made
th_cuda128_1_deep = th_cuda128_1.deepcopy()
Expand All @@ -67,6 +72,7 @@ def stats_thicket_axis_columns(rajaperf_cuda_block128_1M_cali):
thickets=thickets,
axis="columns",
headers=["Cuda 1", "Cuda 2"],
disable_tqdm=True,
)

return thickets, thickets_cp, combined_th
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_caliperreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

def test_from_caliperreader(rajaperf_seq_O3_1M_cali):
"""Sanity test a thicket object with known data."""
tk = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0])
tk = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True)

# Check the object type
assert isinstance(tk, Thicket)
Expand Down
6 changes: 3 additions & 3 deletions thicket/tests/test_concat_thickets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@


def test_concat_thickets_index(mpi_scaling_cali):
th_27 = Thicket.from_caliperreader(mpi_scaling_cali[0])
th_64 = Thicket.from_caliperreader(mpi_scaling_cali[1])
th_27 = Thicket.from_caliperreader(mpi_scaling_cali[0], disable_tqdm=True)
th_64 = Thicket.from_caliperreader(mpi_scaling_cali[1], disable_tqdm=True)

tk = Thicket.concat_thickets([th_27, th_64])
tk = Thicket.concat_thickets([th_27, th_64], disable_tqdm=True)

# Check dataframe shape
assert tk.dataframe.shape == (90, 7)
Expand Down
4 changes: 2 additions & 2 deletions thicket/tests/test_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


def test_copy(rajaperf_seq_O3_1M_cali):
self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0])
self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True)
self.exc_metrics.append("value")
other = self.copy()

Expand Down Expand Up @@ -68,7 +68,7 @@ def test_copy(rajaperf_seq_O3_1M_cali):


def test_deepcopy(rajaperf_seq_O3_1M_cali):
self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0])
self = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali[0], disable_tqdm=True)
self.exc_metrics.append("value")
other = self.deepcopy()

Expand Down
6 changes: 3 additions & 3 deletions thicket/tests/test_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def test_display_histogram(rajaperf_seq_O3_1M_cali):
tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)

node = pd.unique(tk.dataframe.reset_index()["node"])[4]

Expand Down Expand Up @@ -79,7 +79,7 @@ def test_display_histogram_columnar_join(thicket_axis_columns):


def test_display_heatmap(rajaperf_seq_O3_1M_cali):
tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)

th.stats.variance(tk, columns=["Min time/rank"])

Expand Down Expand Up @@ -152,7 +152,7 @@ def test_display_heatmap_columnar_join(thicket_axis_columns):


def test_display_boxplot(rajaperf_seq_O3_1M_cali):
tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
tk = th.Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)

nodes = list(pd.unique(tk.dataframe.reset_index()["node"])[0:2])

Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
def test_unify(literal_thickets):
tk, tk2, tk3 = literal_thickets

union_graph, _thickets = Ensemble._unify([tk, tk2, tk3])
union_graph, _thickets = Ensemble._unify([tk, tk2, tk3], disable_tqdm=True)

ug_hashes = [0, 1, 2, 3, 4, 5, 6]
tk_hashes = [
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_filter_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def filter_multiple_or(th, columns_values):

def test_filter_metadata(rajaperf_seq_O3_1M_cali):
# example thicket
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)
# columns and corresponding values to filter by
columns_values = {"ProblemSizeRunParam": ["30"], "cluster": ["chekov", "quartz"]}
filter_one_column(th, columns_values)
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_filter_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def check_filter_stats(th, columns_values):

def test_filter_stats(rajaperf_seq_O3_1M_cali):
# example thicket
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)
# columns and corresponding values to filter by
columns_values = {
"test_string_column": ["less than 20"],
Expand Down
8 changes: 5 additions & 3 deletions thicket/tests/test_from_statsframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
def test_single_trial(mpi_scaling_cali):
th_list = []
for file in mpi_scaling_cali:
th_list.append(th.Thicket.from_caliperreader(file))
th_list.append(th.Thicket.from_caliperreader(file, disable_tqdm=True))

# Add arbitrary value to aggregated statistics table
t_val = 0
for t in th_list:
t.statsframe.dataframe["test"] = t_val
t_val += 2

tk = th.Thicket.from_statsframes(th_list)
tk = th.Thicket.from_statsframes(th_list, disable_tqdm=True)

# Check level values
assert set(tk.dataframe.index.get_level_values("profile")) == {
Expand All @@ -33,7 +33,9 @@ def test_single_trial(mpi_scaling_cali):
# Check performance data table values
assert set(tk.dataframe["test"]) == {0, 2, 4, 6, 8}

tk_named = th.Thicket.from_statsframes(th_list, metadata_key="mpi.world.size")
tk_named = th.Thicket.from_statsframes(
th_list, metadata_key="mpi.world.size", disable_tqdm=True
)

# Check level values
assert set(tk_named.dataframe.index.get_level_values("mpi.world.size")) == {
Expand Down
17 changes: 11 additions & 6 deletions thicket/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def check_groupby(th, columns_values):


def test_aggregate(rajaperf_cuda_block128_1M_cali):
tk = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali)
tk = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali, disable_tqdm=True)
gb = tk.groupby("spot.format.version")

epsilon = 0.000001
Expand Down Expand Up @@ -111,15 +111,18 @@ def _check_values(_tk_agg):
< epsilon
)

tk_agg = gb.agg(func={"Min time/rank": [np.mean, np.var], "Total time": np.mean})
tk_agg = gb.agg(
func={"Min time/rank": [np.mean, np.var], "Total time": np.mean},
disable_tqdm=True,
)
_check_values(tk_agg)
tk_agg = gb.agg(func=[np.mean, np.var])
tk_agg = gb.agg(func=[np.mean, np.var], disable_tqdm=True)
_check_values(tk_agg)


def test_groupby(rajaperf_seq_O3_1M_cali):
# example thicket
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)
# use cases for string, numeric, and single value columns
columns_values = ["user", "launchdate", "cali.channel"]

Expand All @@ -129,7 +132,7 @@ def test_groupby(rajaperf_seq_O3_1M_cali):
def test_groupby_concat_thickets_columns(rajaperf_seq_O3_1M_cali):
"""Tests case where the Sub-Thickets of a groupby are used in a columnar join"""
# example thicket
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)

# Creates four Sub-Thickets
column = "unique_col"
Expand All @@ -149,6 +152,7 @@ def test_groupby_concat_thickets_columns(rajaperf_seq_O3_1M_cali):
thickets=thickets,
axis="columns",
metadata_key=selected_column,
disable_tqdm=True,
)

test_concat_thickets_columns((thickets, thickets_cp, combined_th))
Expand All @@ -157,7 +161,7 @@ def test_groupby_concat_thickets_columns(rajaperf_seq_O3_1M_cali):
def test_groupby_concat_thickets_columns_subthickets(rajaperf_seq_O3_1M_cali):
"""Tests case where some specific Sub-Thickets of a groupby are used in a columnar join"""
# example thicket
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali)
th = Thicket.from_caliperreader(rajaperf_seq_O3_1M_cali, disable_tqdm=True)

# Creates four Sub-Thickets
column = "unique_col"
Expand All @@ -180,6 +184,7 @@ def test_groupby_concat_thickets_columns_subthickets(rajaperf_seq_O3_1M_cali):
thickets=thickets,
axis="columns",
metadata_key=selected_column,
disable_tqdm=True,
)

test_concat_thickets_columns((thickets, thickets_cp, combined_th))
4 changes: 2 additions & 2 deletions thicket/tests/test_intersection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@


def test_intersection(rajaperf_cali_1trial):
tk = th.from_caliperreader(rajaperf_cali_1trial)
tk = th.from_caliperreader(rajaperf_cali_1trial, disable_tqdm=True)

intersected_tk = tk.intersection()

intersected_tk_other = th.from_caliperreader(
rajaperf_cali_1trial, intersection=True
rajaperf_cali_1trial, intersection=True, disable_tqdm=True
)

# Check other methodology
Expand Down
4 changes: 2 additions & 2 deletions thicket/tests/test_model_extrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
def test_model_extrap(mpi_scaling_cali):
from thicket.model_extrap import Modeling

t_ens = Thicket.from_caliperreader(mpi_scaling_cali)
t_ens = Thicket.from_caliperreader(mpi_scaling_cali, disable_tqdm=True)

# Method 1: Model created using metadata column
mdl = Modeling(
Expand Down Expand Up @@ -62,7 +62,7 @@ def test_model_extrap(mpi_scaling_cali):
def test_componentize_functions(mpi_scaling_cali):
from thicket.model_extrap import Modeling

t_ens = Thicket.from_caliperreader(mpi_scaling_cali)
t_ens = Thicket.from_caliperreader(mpi_scaling_cali, disable_tqdm=True)

mdl = Modeling(
t_ens,
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def check_query(th, hnids, query):

def test_query(rajaperf_cuda_block128_1M_cali):
# test thicket
th = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali)
th = Thicket.from_caliperreader(rajaperf_cuda_block128_1M_cali, disable_tqdm=True)
# test arguments
hnids = [0, 1, 2, 3, 4] # 5, 6, 7 have Nones
query = (
Expand Down
Loading

0 comments on commit 24c1ed5

Please sign in to comment.