From 4a0606ef01175df6593d7a9689a04f1f376db39e Mon Sep 17 00:00:00 2001 From: ilumsden Date: Fri, 9 Jun 2023 12:53:17 -0700 Subject: [PATCH 1/3] Adds support for Hatchet new-style queries in Thicket --- thicket/query.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++ thicket/thicket.py | 36 ++++++++++++++++++++-------- 2 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 thicket/query.py diff --git a/thicket/query.py b/thicket/query.py new file mode 100644 index 00000000..8007d9b0 --- /dev/null +++ b/thicket/query.py @@ -0,0 +1,60 @@ +# Copyright 2022 Lawrence Livermore National Security, LLC and other +# Thicket Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +from hatchet.query import ( + Query, + ObjectQuery, + StringQuery, + parse_string_dialect, + CompoundQuery, + ConjunctionQuery, + DisjunctionQuery, + ExclusiveDisjunctionQuery, + NegationQuery, + QueryEngine, + InvalidQueryPath, + InvalidQueryFilter, + RedundantQueryFilterWarning, + BadNumberNaryQueryArgs, +) + +from hatchet.query.compat import ( + AbstractQuery, + NaryQuery, + AndQuery, + IntersectionQuery, + OrQuery, + UnionQuery, + XorQuery, + SymDifferenceQuery, + NotQuery, + QueryMatcher, + CypherQuery, + parse_cypher_query, +) + +import hatchet.query.is_hatchet_query + +def is_thicket_query(query_obj): + return hatchet.query.is_hatchet_query(query_obj) + + +__all__ = [ + "Query", + "ObjectQuery", + "StringQuery", + "parse_string_dialect", + "CompoundQuery", + "ConjunctionQuery", + "DisjunctionQuery", + "ExclusiveDisjunctionQuery", + "NegationQuery", + "QueryEngine", + "InvalidQueryPath", + "InvalidQueryFilter", + "RedundantQueryFilterWarning", + "BadNumberNaryQueryArgs", + "is_thicket_query", +] diff --git a/thicket/thicket.py b/thicket/thicket.py index bffad465..d66ccb94 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -12,9 +12,15 @@ import pandas as pd import numpy as np from hatchet import GraphFrame -from hatchet.query import AbstractQuery import thicket.helpers as helpers +from thicket.query import ( + is_thicket_query, + ObjectQuery, + parse_string_dialect, + QueryEngine, + AbstractQuery, +) from .utils import verify_sorted_profile from .utils import verify_thicket_structures @@ -68,6 +74,7 @@ def __init__( self.statsframe = statsframe self.performance_cols = helpers._get_perf_columns(self.dataframe) + self.query_engine = QueryEngine() def __eq__(self, other): """Compare two thicket objects. @@ -1087,7 +1094,7 @@ def filter(self, filter_func): "Invalid function: thicket.filter(), please use thicket.filter_metadata() or thicket.filter_stats()" ) - def query(self, query_obj, squash=True, update_inc_cols=True): + def query(self, query_obj, squash=True, update_inc_cols=True, multi_index_mode="all"): """Apply a Hatchet query to the Thicket object. Arguments: @@ -1097,24 +1104,33 @@ def query(self, query_obj, squash=True, update_inc_cols=True): the query update_inc_cols (boolean, optional): if True, update inclusive columns when performing squash. + multi_index_mode (str, optional): select how to aggregate the results of a predicate. + Can be "all" (default; requires the predicate to be True for all rows of data for a given + node), "any" (requires the predicate to be True for one or more rows of data for a given + node), or "off" (disables the use of query language dialects) Returns: (Thicket): a new Thicket object containing the data that matches the query """ - if isinstance(query_obj, (list, str)): - raise UnsupportedQuery( - "Object and String queries from Hatchet are not yet supported in Thicket" - ) - elif not issubclass(type(query_obj), AbstractQuery): + if not is_thicket_query(query_obj): raise TypeError( - "Input to 'query' must be a Hatchet query (i.e., list, str, or subclass of AbstractQuery)" + "Input to 'query' must be a Hatchet query (i.e., list, str, or new- or old-style query object)" + ) + if multi_index_mode == "off" and (isinstance(query_obj, list) or isinstance(query_obj, str)): + raise UnsupportedQuery( + "'Raw' object- and string-based dialect queries cannot be used when 'multi_index_mode' is set to 'off'" ) dframe_copy = self.dataframe.copy() index_names = self.dataframe.index.names dframe_copy.reset_index(inplace=True) query = query_obj - # TODO Add a conditional here to parse Object and String queries when supported - query_matches = query.apply(self) + if isinstance(query_obj, list): + query = ObjectQuery(query_obj, multi_index_mode=multi_index_mode) + elif isinstance(query_obj, str): + query = parse_string_dialect(query_obj, multi_index_mode=multi_index_mode) + elif issubclass(type(query_obj), AbstractQuery): + query = query_obj._get_new_query() + query_matches = self.query_engine.apply(query, self.graph, self.dataframe) filtered_df = dframe_copy.loc[dframe_copy["node"].isin(query_matches)] if filtered_df.shape[0] == 0: raise EmptyQuery("The provided query would have produced an empty Thicket.") From 335a49e9a5a7e0897ac638d04ca13208e6e5f066 Mon Sep 17 00:00:00 2001 From: ilumsden Date: Fri, 9 Jun 2023 14:37:42 -0700 Subject: [PATCH 2/3] Updates querying unit tests to account for new-style queries --- thicket/query.py | 1 + thicket/tests/test_query.py | 51 ++++++++++++++++++++++++++++++++++--- thicket/thicket.py | 8 ++++-- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/thicket/query.py b/thicket/query.py index 8007d9b0..69bc5930 100644 --- a/thicket/query.py +++ b/thicket/query.py @@ -37,6 +37,7 @@ import hatchet.query.is_hatchet_query + def is_thicket_query(query_obj): return hatchet.query.is_hatchet_query(query_obj) diff --git a/thicket/tests/test_query.py b/thicket/tests/test_query.py index 8dd45a87..37e33e24 100644 --- a/thicket/tests/test_query.py +++ b/thicket/tests/test_query.py @@ -8,9 +8,10 @@ import hatchet as ht from thicket import Thicket +from thicket.query import Query, QueryMatcher -def check_query(th, hnids, query): +def check_query(th, hnids, query, multi_index_mode): """Check query function for Thicket object. Arguments: @@ -27,7 +28,7 @@ def check_query(th, hnids, query): match_frames = [node.frame for node in match] match_names = [frame["name"] for frame in match_frames] # Match all nodes using query - filt_th = th.query(query) + filt_th = th.query(query, multi_index_mode=multi_index_mode) filt_nodes = list(filt_th.graph.traverse()) # Get filtered nodes and profiles @@ -43,13 +44,55 @@ def check_query(th, hnids, query): ) -def test_query(rajaperf_basecuda_xl_cali): +def test_new_style_query_base(rajaperf_basecuda_xl_cali): # test thicket th = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali) # test arguments hnids = [0, 1, 2, 3, 5, 6, 8, 9] query = ( - ht.QueryMatcher() + Query() + .match("*") + .rel( + ".", + lambda row: row["name"] + .apply(lambda x: re.match(r"Algorithm.*block_128", x) is not None) + .all(), + ) + ) + + check_query(th, hnids, query) + + +def test_new_style_query_object(rajaperf_basecuda_xl_cali): + # test thicket + th = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali) + # test arguments + hnids = [0, 1, 2, 3, 5, 6, 8, 9] + query = ["*", {"name": "Algorithm.*block_128"}] + + check_query(th, hnids, query, multi_index_mode="all") + + +def test_new_style_query_string(rajaperf_basecuda_xl_cali): + # test thicket + th = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali) + # test arguments + hnids = [0, 1, 2, 3, 5, 6, 8, 9] + query = """ + MATCH ("*")->(p) + WHERE p."name" =~ "Algorithm.*block_128" + """ + + check_query(th, hnids, query, multi_index_mode="all") + + +def test_old_style_query(rajaperf_basecuda_xl_cali): + # test thicket + th = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali) + # test arguments + hnids = [0, 1, 2, 3, 5, 6, 8, 9] + query = ( + QueryMatcher() .match("*") .rel( ".", diff --git a/thicket/thicket.py b/thicket/thicket.py index d66ccb94..019666de 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1094,7 +1094,9 @@ def filter(self, filter_func): "Invalid function: thicket.filter(), please use thicket.filter_metadata() or thicket.filter_stats()" ) - def query(self, query_obj, squash=True, update_inc_cols=True, multi_index_mode="all"): + def query( + self, query_obj, squash=True, update_inc_cols=True, multi_index_mode="all" + ): """Apply a Hatchet query to the Thicket object. Arguments: @@ -1116,7 +1118,9 @@ def query(self, query_obj, squash=True, update_inc_cols=True, multi_index_mode=" raise TypeError( "Input to 'query' must be a Hatchet query (i.e., list, str, or new- or old-style query object)" ) - if multi_index_mode == "off" and (isinstance(query_obj, list) or isinstance(query_obj, str)): + if multi_index_mode == "off" and ( + isinstance(query_obj, list) or isinstance(query_obj, str) + ): raise UnsupportedQuery( "'Raw' object- and string-based dialect queries cannot be used when 'multi_index_mode' is set to 'off'" ) From 7a1620664b69be90ee9579fc54e94f60daf7a04a Mon Sep 17 00:00:00 2001 From: ilumsden Date: Fri, 9 Jun 2023 14:51:24 -0700 Subject: [PATCH 3/3] Fixes flake noqa issue --- thicket/query.py | 7 +++++-- thicket/tests/test_query.py | 6 ++---- thicket/thicket.py | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/thicket/query.py b/thicket/query.py index 69bc5930..a79bb47e 100644 --- a/thicket/query.py +++ b/thicket/query.py @@ -3,6 +3,9 @@ # # SPDX-License-Identifier: MIT +# Make flake8 ignore unused names in this file +# flake8: noqa: F401 + from hatchet.query import ( Query, ObjectQuery, @@ -35,11 +38,11 @@ parse_cypher_query, ) -import hatchet.query.is_hatchet_query +from hatchet.query import is_hatchet_query def is_thicket_query(query_obj): - return hatchet.query.is_hatchet_query(query_obj) + return is_hatchet_query(query_obj) __all__ = [ diff --git a/thicket/tests/test_query.py b/thicket/tests/test_query.py index 37e33e24..b5663023 100644 --- a/thicket/tests/test_query.py +++ b/thicket/tests/test_query.py @@ -5,8 +5,6 @@ import re -import hatchet as ht - from thicket import Thicket from thicket.query import Query, QueryMatcher @@ -60,7 +58,7 @@ def test_new_style_query_base(rajaperf_basecuda_xl_cali): ) ) - check_query(th, hnids, query) + check_query(th, hnids, query, multi_index_mode="off") def test_new_style_query_object(rajaperf_basecuda_xl_cali): @@ -102,4 +100,4 @@ def test_old_style_query(rajaperf_basecuda_xl_cali): ) ) - check_query(th, hnids, query) + check_query(th, hnids, query, multi_index_mode="off") diff --git a/thicket/thicket.py b/thicket/thicket.py index 019666de..8b51c86e 100644 --- a/thicket/thicket.py +++ b/thicket/thicket.py @@ -1114,7 +1114,7 @@ def query( Returns: (Thicket): a new Thicket object containing the data that matches the query """ - if not is_thicket_query(query_obj): + if not is_thicket_query(query_obj) or not isinstance(query_obj, (list, str)): raise TypeError( "Input to 'query' must be a Hatchet query (i.e., list, str, or new- or old-style query object)" )