Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial version of ccpp_track_variables.py #419

Merged
merged 34 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f2619d9
Initial shell version of ccpp_track_variables.py
mkavulich Oct 14, 2021
2f209a5
A few structure changes, add function for parsing arguments and check…
mkavulich Oct 14, 2021
5715006
Change "xml" to "sdf" to better reflect other script conventions, fle…
mkavulich Nov 1, 2021
4160be2
Starting to make use of existing objects
mkavulich Nov 2, 2021
e9da740
Create new method and attribute for Suite class that creates a call t…
mkavulich Nov 2, 2021
31245e1
Changing directions a little: user must provide path to directory wit…
mkavulich Nov 9, 2021
83f733f
Add logging routines; debug flag (not utilized yet)
mkavulich Nov 11, 2021
6cb14c9
working on getting dictionary of schemes <--> meta filenames
mkavulich Nov 11, 2021
0ceb853
Read in config file instead of metadata_path (maybe can revisit this …
mkavulich Nov 12, 2021
a332867
Need to add call to gather_variable_definitions in order to get new m…
mkavulich Nov 12, 2021
e64dbe3
Finally got to where I thought I should be! I now have a calling tree…
mkavulich Nov 12, 2021
64dad53
Getting close to finished now; using parse_metadata_file to return Me…
mkavulich Nov 15, 2021
67229d2
Find if variable matches in subroutine, if so, output variable name, …
mkavulich Nov 15, 2021
36a23e6
Working prototype complete! The script now takes in an SDF, config fi…
mkavulich Nov 15, 2021
5f9e029
Code cleanup with feedback from pylint
mkavulich Nov 16, 2021
5e03756
Remove unneeded debug changes
mkavulich Nov 16, 2021
8ec769d
Add new "--draw" argument and a stub (for now) subroutine that will g…
mkavulich Nov 16, 2021
6c7cfe9
Convert var_graph from Ordered Dictionary to list of tuples to preser…
mkavulich Nov 18, 2021
9fa53bb
Improve function descriptions, remove bits of draw routine until later
mkavulich Nov 18, 2021
d64a0fb
Don't raise exception if partial matches found
mkavulich Feb 3, 2022
c2012ba
Merge remote-tracking branch 'origin/main' into feature/track_variabl…
mkavulich Feb 24, 2022
08c2027
Explicitly shebang to python3, adopt new environment and logging stru…
mkavulich Feb 24, 2022
566e485
Remove unnecessary "success" variables and handle exceptions in the a…
mkavulich Feb 24, 2022
9e6b59a
modify --> use; this script tracks variables that are both intent(in)…
mkavulich Feb 24, 2022
bc6963a
Convert remaining old-format strings to f-strings
mkavulich Feb 24, 2022
1193e44
Incorporate reviewer suggestion for more robust directory name parsing
mkavulich Feb 24, 2022
c7cc6e4
A few more fixes from pylint, remove redundant "action='store_true'" …
mkavulich Mar 3, 2022
12d9078
Move parsing of command-line arguments to "parse_arguments" function,…
mkavulich Mar 3, 2022
ee99e7e
Add a bit more information about call to "gather_variable_definitions…
mkavulich Mar 3, 2022
b8654c4
Assign "call_tree" attribute as an empty list rather than Nonetype in…
mkavulich Mar 3, 2022
5f545d7
restore accidentally removed store_true action from debug argument
mkavulich Mar 3, 2022
c27f06f
Suggestion from Dom: move creation of scheme call tree to "parse" met…
mkavulich Mar 24, 2022
3f1dd5b
Standardize spaces around `=` character
mkavulich Apr 21, 2022
948b620
Merge remote-tracking branch 'origin/main' into feature/track_variabl…
mkavulich May 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions scripts/ccpp_track_variables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
#!/usr/bin/env python3

# Standard modules
import os
import argparse
import logging
import glob

# CCPP framework imports
from metadata_table import find_scheme_names, parse_metadata_file
from ccpp_prebuild import import_config, gather_variable_definitions
from mkstatic import Suite
from parse_checkers import registered_fortran_ddt_names
from parse_tools import init_log, set_log_level
from framework_env import CCPPFrameworkEnv

###############################################################################
# Set up the command line argument parser and other global variables #
###############################################################################

###############################################################################
# Functions and subroutines #
###############################################################################

def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--sdf', help='suite definition file to parse', required=True)
parser.add_argument('-m', '--metadata_path',
help='path to CCPP scheme metadata files', required=True)
parser.add_argument('-c', '--config',
help='path to CCPP prebuild configuration file', required=True)
parser.add_argument('-v', '--variable', help='variable to track through CCPP suite',
required=True)
parser.add_argument('--debug', action='store_true', help='enable debugging output',
default=False)

args = parser.parse_args()

return(args)

def setup_logging(debug):
"""Sets up the logging module and logging level."""

#Use capgen logging tools
logger = init_log('ccpp_track_variables')

if debug:
set_log_level(logger, logging.DEBUG)
logger.info('Logging level set to DEBUG')
else:
set_log_level(logger, logging.WARNING)
return logger

def parse_suite(sdf, run_env):
"""Reads the provided sdf, parses into a Suite data structure, including the "call tree":
the ordered list of schemes for the suite specified by the provided sdf"""
run_env.logger.info(f'Reading sdf {sdf} and populating Suite object')
suite = Suite(sdf_name=sdf)
run_env.logger.info(f'Reading sdf {sdf} and populating Suite object')
success = suite.parse(make_call_tree=True)
if not success:
raise Exception(f'Parsing suite definition file {sdf} failed.')
run_env.logger.info(f'Successfully read sdf {suite.sdf_name}')
return suite

def create_metadata_filename_dict(metapath):
"""Given a path, read all .meta files in that directory and add them to a dictionary: the keys
are the name of the scheme, and the values are the filename of the .meta file associated
with that scheme"""

metadata_dict = {}
scheme_filenames = glob.glob(os.path.join(metapath, "*.meta"))
if not scheme_filenames:
raise Exception(f'No files found in {metapath} with ".meta" extension')

for scheme_fn in scheme_filenames:
schemes = find_scheme_names(scheme_fn)
# The above returns a list of schemes in each filename, but
# we want a dictionary of schemes associated with filenames:
for scheme in schemes:
metadata_dict[scheme] = scheme_fn

return metadata_dict


def create_var_graph(suite, var, config, metapath, run_env):
"""Given a suite, variable name, a 'config' dictionary, and a path to .meta files:
1. Creates a dictionary associating schemes with their .meta files
2. Loops through the call tree of the provided suite
3. For each scheme, reads .meta file for said scheme, checks for variable within that
scheme, and if it exists, adds an entry to a list of tuples, where each tuple includes
the name of the scheme and the intent of the variable within that scheme"""

# Create a list of tuples that will hold the in/out information for each scheme
var_graph = []

run_env.logger.debug(f"reading .meta files in path:\n {metapath}")
metadata_dict=create_metadata_filename_dict(metapath)

run_env.logger.debug(f"reading metadata files for schemes defined in config file: "
f"{config['scheme_files']}")

# Loop through call tree, find matching filename for scheme via dictionary schemes_in_files,
# then parse that metadata file to find variable info
partial_matches = {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have examples of partial matches? How does tracking them help?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case is for when a user inputs something like "latent_heat" as their variable, which matches multiple standard names (e.g. latent_heat_of_vaporization_of_water_at_0c, surface_upward_potential_latent_heat_flux, etc.). This makes it easier for users who might not know the exact standard name of the variable they are looking for, or for something like, for example, any variable containing the word "temperature".

The final section in my PR message ("Partial match for variable") gives an example of this.

for scheme in suite.call_tree:
run_env.logger.debug(f"reading meta file for scheme {scheme} ")

if scheme in metadata_dict:
scheme_filename = metadata_dict[scheme]
else:
raise Exception(f"Error, scheme '{scheme}' from suite '{suite.sdf_name}' "
f"not found in metadata files in {metapath}")

run_env.logger.debug(f"reading metadata file {scheme_filename} for scheme {scheme}")

new_metadata_headers = parse_metadata_file(scheme_filename,
known_ddts=registered_fortran_ddt_names(), run_env=run_env)
for scheme_metadata in new_metadata_headers:
for section in scheme_metadata.sections():
found_var = []
intent = ''
for scheme_var in section.variable_list():
exact_match = False
if var == scheme_var.get_prop_value('standard_name'):
run_env.logger.debug(f"Found variable {var} in scheme {section.title}")
found_var = var
exact_match = True
intent = scheme_var.get_prop_value('intent')
break
scheme_var_standard_name = scheme_var.get_prop_value('standard_name')
if scheme_var_standard_name.find(var) != -1:
run_env.logger.debug(f"{var} matches {scheme_var_standard_name}")
found_var.append(scheme_var_standard_name)
if not found_var:
run_env.logger.debug(f"Did not find variable {var} in scheme {section.title}")
elif exact_match:
run_env.logger.debug(f"Exact match found for variable {var} in scheme {section.title},"
f" intent {intent}")
var_graph.append((section.title,intent))
else:
run_env.logger.debug(f"Found inexact matches for variable(s) {var} "
f"in scheme {section.title}:\n{found_var}")
partial_matches[section.title] = found_var
if var_graph:
success = True
run_env.logger.debug(f"Successfully generated variable graph for sdf {suite.sdf_name}\n")
else:
success = False
run_env.logger.error(f"Variable {var} not found in any suites for sdf {suite.sdf_name}\n")
if partial_matches:
print("Did find partial matches that may be of interest:\n")
for key in partial_matches:
print(f"In {key} found variable(s) {partial_matches[key]}")

return (success,var_graph)

def main():
"""Main routine that traverses a CCPP suite and outputs the list of schemes that use given variable"""

args = parse_arguments()

logger = setup_logging(args.debug)

#Use new capgen class CCPPFrameworkEnv
run_env = CCPPFrameworkEnv(logger, host_files="", scheme_files="", suites="")

suite = parse_suite(args.sdf,run_env)

(success, config) = import_config(args.config, None)
if not success:
raise Exception('Call to import_config failed.')

# Variables defined by the host model; this call is necessary because it converts some old
# metadata formats so they can be used later in the script

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked at gather_variable_definitions and it seems to be a function that doesn't act on any input data structures in place, or have any side effects. Perhaps you are calling this only for the data checking, but I really don't think that this call is doing anything to metadata that is used downstream, so the comment seems misleading. To have it convert metadata, I think you'd need to save its outputs in a data structure. Perhaps update one of the config entries? I'm not sure how that would work since I haven't dug into the details of the objects being acted on here.

(success, _, _) = gather_variable_definitions(config['variable_definition_files'],
config['typedefs_new_metadata'])
if not success:
raise Exception('Call to gather_variable_definitions failed.')

(success, var_graph) = create_var_graph(suite, args.variable, config, args.metadata_path, run_env)
if success:
print(f"For suite {suite.sdf_name}, the following schemes (in order) "
f"use the variable {args.variable}:")
for entry in var_graph:
print(f"{entry[0]} (intent {entry[1]})")


if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion scripts/metadata_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# Output: This routine converts the argument tables for all subroutines / typedefs / kind / module variables
# into dictionaries suitable to be used with ccpp_prebuild.py (which generates the fortran code for the caps)

# Items in this dictionary are used for checking valid entries in metadata tables. For columsn with no keys/keys
# Items in this dictionary are used for checking valid entries in metadata tables. For columns with no keys/keys
# commented out, no check is performed. This is the case for 'type' and 'kind' right now, since models use their
# own derived data types and kind types.
VALID_ITEMS = {
Expand Down
20 changes: 19 additions & 1 deletion scripts/mkstatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ def __init__(self, **kwargs):
self._sdf_name = None
self._all_schemes_called = None
self._all_subroutines_called = None
self._call_tree = []
self._caps = None
self._module = None
self._subroutines = None
Expand Down Expand Up @@ -545,7 +546,7 @@ def update_cap(self):
def update_cap(self, value):
self._update_cap = value

def parse(self):
def parse(self, make_call_tree=False):
'''Parse the suite definition file.'''
success = True

Expand All @@ -568,6 +569,10 @@ def parse(self):
self._all_schemes_called = []
self._all_subroutines_called = []

if make_call_tree:
# Call tree of all schemes in SDF (with duplicates and subcycles)
self._call_tree = []

# Build hierarchical structure as in SDF
self._groups = []
for group_xml in suite_xml:
Expand All @@ -594,14 +599,22 @@ def parse(self):
loop=int(subcycle_xml.get('loop'))
for ccpp_stage in CCPP_STAGES:
self._all_subroutines_called.append(scheme_xml.text + '_' + CCPP_STAGES[ccpp_stage])

subcycles.append(Subcycle(loop=loop, schemes=schemes))

if make_call_tree:
# Populate call tree from SDF's heirarchical structure, including multiple calls in subcycle loops
for loop in range(0,int(subcycle_xml.get('loop'))):
for scheme_xml in subcycle_xml:
self._call_tree.append(scheme_xml.text)

self._groups.append(Group(name=group_xml.get('name'), subcycles=subcycles, suite=self._name))

# Remove duplicates from list of all subroutines an schemes
self._all_schemes_called = list(set(self._all_schemes_called))
self._all_subroutines_called = list(set(self._all_subroutines_called))


return success

def print_debug(self):
Expand All @@ -618,6 +631,11 @@ def all_schemes_called(self):
'''Get the list of all schemes.'''
return self._all_schemes_called

@property
def call_tree(self):
'''Get the call tree of the suite (all schemes, in order, with duplicates and loops).'''
return self._call_tree

@property
def all_subroutines_called(self):
'''Get the list of all subroutines.'''
Expand Down