Merge pull request #46 from NOAA-GFDL/more-fixes

Various
NOAA-GFDL · Mar 5, 2025 · bae3e7e · bae3e7e
2 parents 2c992ed + 2250e1b
commit bae3e7e
Show file tree

Hide file tree

Showing 8 changed files with 111 additions and 106 deletions.
diff --git a/Jinja2Filters/form_remap_dep.py b/Jinja2Filters/form_remap_dep.py
@@ -1,6 +1,7 @@
 import re
 import os
 import metomi.rose.config
+import ast
 """! form_remap_dep parses the remap_pp_components rose-app.conf and uses input from rose-suite.conf in the form of
 env variables and returns the pp component and source name dependencies for remap_pp_components task execution. For 
 instance, for an atmos PP component that requires the regridded atmos_month and regridded atmos_daily history 
@@ -107,7 +108,7 @@ def form_remap_dep(grid_type: str, temporal_type: str, chunk: str, pp_components
                #print("DEBUG: Skipping as {} is requested, but not in rose-app config {}:".format(chunk, chunk_from_config))
                continue
 
-        results = node.get_value(keys=[item, 'sources']).split()
+        results = ast.literal_eval(node.get_value(keys=[item, 'sources']))
         remap_comp = comp
         answer = sorted(list(set(results)))
         if remap_comp is not None:

diff --git a/Jinja2Filters/form_task_parameters.py b/Jinja2Filters/form_task_parameters.py
@@ -1,9 +1,16 @@
 import re
 import os
 import metomi.rose.config
+import ast
 from pathlib import Path
 import yaml
 
+# set up logging
+import logging
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
 def form_task_parameters(grid_type, temporal_type, pp_components_str, yamlfile):
     """Form the task parameter list based on the grid type, the temporal type,
     and the desired pp component(s)
@@ -13,11 +20,8 @@ def form_task_parameters(grid_type, temporal_type, pp_components_str, yamlfile):
         temporal_type (str): One of: temporal or static
         pp_component (str): all, or a space-separated list
 """
-    # Turn space separated pp_somponents into list
+    logger.debug(f"Desired pp components: {pp_components_str}")
     pp_components = pp_components_str.split()
-    #print("DEBUG: desired pp components:", pp_components)
-
-    # Path to remap rose-app.conf
     path_to_conf = os.path.dirname(os.path.abspath(__file__)) + '/../app/remap-pp-components/rose-app.conf'
     node = metomi.rose.config.load(path_to_conf)
 
@@ -40,23 +44,23 @@ def form_task_parameters(grid_type, temporal_type, pp_components_str, yamlfile):
         if item == "env" or item == "command":
             continue
         comp = regex_pp_comp.match(item).group()
-        #print("DEBUG: Examining", item, comp)
+        logger.debug(f"Examining item '{item}' comp '{comp}'")
 
         # skip if pp component not desired
-        if "all" not in pp_components:
-            #print("DEBUG: PP COMP", pp_components, "COMP is", comp)
-            if comp not in pp_components: 
-       		#print("DEBUG2: comp not in pp_components", pp_components, "and", comp)
-                continue
-        #print("DEBUG: Examining", item, comp)
+        logger.debug(f"Is {comp} in {pp_components}?")
+        if comp in pp_components: 
+            logger.debug('Yes')
+        else:
+            logger.debug('No')
+            continue
 
         # skip if grid type is not desired
         # some grid types (i.e. regrid-xy) have subtypes (i.e. 1deg, 2deg)
         # in remap-pp-components/rose-app.conf the grid type and subgrid is specified as "regrid-xy/1deg" (e.g.).
         # So we will strip off after the slash and the remainder is the grid type
         candidate_grid_type = re.sub('\/.*', '', node.get_value(keys=[item, 'grid']))
         if candidate_grid_type != grid_type:
-            #print("DEBUG: Skipping as not right grid; got", candidate_grid_type, "and wanted", grid_type)
+            logger.debug(f"Skipping as not right grid; got '{candidate_grid_type}' and wanted '{grid_type}'")
             continue
 
         # filter static and temporal
@@ -65,22 +69,23 @@ def form_task_parameters(grid_type, temporal_type, pp_components_str, yamlfile):
         # if freq does not include "P0Y" => temporal
         freq = node.get_value(keys=[item, 'freq'])
         if freq is not None and 'P0Y' in freq and temporal_type == 'temporal':
-            #print("DEBUG: Skipping static when temporal is requested")
+            logger.debug("Skipping static when temporal is requested")
             continue
         if temporal_type == "static":
             if freq is not None and 'P0Y' not in freq:
-                #print("DEBUG: Skipping as static is requested, no P0Y here", freq)
+                logger.debug("Skipping as static is requested, no P0Y here", freq)
                 continue
         elif (temporal_type == "temporal"):
             if freq is not None and 'P0Y' in freq:
-                #print("DEBUG: Skipping as temporal is requested, P0Y here", freq)
+                logger.debug("Skipping as temporal is requested, P0Y here", freq)
                 continue
         else:
             raise Exception("Unknown temporal type:", temporal_type)
 
-        results = results + node.get_value(keys=[item, 'sources']).split()
+        # convert array in string form to array
+        sources = ast.literal_eval(node.get_value(keys=[item, 'sources']))
+        results.extend(sources)
 
     answer = sorted(list(set(results)))
+    logger.debug("Returning string" + ', '.join(answer))
     return(', '.join(answer))
-
-#print(form_task_parameters('native', 'temporal', 'land atmos land_cubic'))
diff --git a/Jinja2Filters/get_analysis_info.py b/Jinja2Filters/get_analysis_info.py
@@ -1,4 +1,3 @@
-from logging import getLogger
 from pathlib import Path
 
 import metomi.isodatetime.dumpers
@@ -7,9 +6,13 @@
 
 from legacy_date_conversions import *
 
+# set up logging
+import logging
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 
 # Global variables just set to reduce typing a little.
-logger = getLogger(__name__)
 duration_parser = metomi.isodatetime.parsers.DurationParser()
 one_year = duration_parser.parse("P1Y")
 time_dumper = metomi.isodatetime.dumpers.TimePointDumper()
@@ -50,6 +53,7 @@ def __init__(self, name, config, experiment_components, experiment_starting_date
             experiment_stopping_date: Stopping date for the experiment.
         """
         self.name = name
+        logger.debug(f"{name}: initializing AnalysisScript instance")
 
         # Skip if configuration wants to skip it
         self.switch = config["workflow"]["switch"]
@@ -94,11 +98,21 @@ def __init__(self, name, config, experiment_components, experiment_starting_date
             self.is_legacy = False
 
         self.data_frequency = config["required"]["data_frequency"]
+
+        # if dates are years, convert to string or else ISO conversion will fail
+        if isinstance(config["required"]["date_range"][0], int):
+            one = "{:04d}".format(config["required"]["date_range"][0])
+            two = "{:04d}".format(config["required"]["date_range"][1])
+        else:
+            one = config["required"]["date_range"][0]
+            two = config["required"]["date_range"][1]
         self.date_range = [
-            time_parser.parse(config["required"]["date_range"][0]),
-            time_parser.parse(config["required"]["date_range"][1])
+            time_parser.parse(one),
+            time_parser.parse(two)
         ]
 
+        logger.debug(f"{name}: initialized instance")
+
     def choose_pp_chunk(self, chunk1, chunk2):
         """Choose the most suitable postprocessing chunk size.
 
@@ -163,10 +177,10 @@ def graph(self, chunk, analysis_only):
 
         graph = ""
 
-        #print(f"DEBUG: script frequency = {self.script_frequency}")
-        #print(f"DEBUG: chunk = {chunk}")
-        #print(f"DEBUG: analysis date range = {self.date_range}")
-        #print(f"DEBUG: exp date range = {self.experiment_date_range}")
+        logger.debug(f"script frequency = {self.script_frequency}")
+        logger.debug(f"chunk = {chunk}")
+        logger.debug(f"analysis date range = {self.date_range}")
+        logger.debug(f"exp date range = {self.experiment_date_range}")
 
         if self.script_frequency == chunk and self.date_range == self.experiment_date_range \
            and not self.cumulative:
@@ -371,7 +385,7 @@ def definition(self, chunk, pp_dir):
             # corresponding to the interval (chunk), e.g. ANALYSIS-P1Y.
             # Then, the analysis script will inherit from that family, to enable
             # both the task triggering and the yr1 and datachunk template vars.
-            logger.info(f"ANALYSIS: {self.name}: Will run every chunk {chunk}")
+            logger.info(f"{self.name}: Will run every chunk {chunk}")
             if self.is_legacy:
                 definitions += legacy_analysis_str
             else:
@@ -414,6 +428,7 @@ def definition(self, chunk, pp_dir):
             if not self.is_legacy:
                 definitions += install_str
 
+            logger.debug(f"{self.name}: Finished determining scripting")
             return definitions
 
         if self.script_frequency == chunk and self.date_range == self.experiment_date_range \
@@ -422,7 +437,7 @@ def definition(self, chunk, pp_dir):
             # To make the task run, we will create a task family for
             # each chunk/interval, starting from the beginning of pp data
             # then we create an analysis script task for each of these task families.
-            logger.info(f"ANALYSIS: {self.name}: Will run each chunk {chunk} from beginning {self.experiment_date_range[0]}")
+            logger.info(f"{self.name}: Will run each chunk {chunk} from beginning {self.experiment_date_range[0]}")
             date = self.experiment_date_range[0] + chunk - one_year
             while date <= self.experiment_date_range[1]:
                 date_str = time_dumper.strftime(date, '%Y')
@@ -502,7 +517,7 @@ def definition(self, chunk, pp_dir):
                     pass
                 else:
                     raise NotImplementedError(f"ERROR: Non-supported analysis script configuration: {self.name}: run-once (R1), timeaverages, and non-accumulative is inconsistent, unless duration '{chunk}' represents {self.date_range[0]} through {self.date_range[1]} inclusive.")
-            logger.info(f"ANALYSIS: {self.name}: Will run once for time period {self.date_range[0]} to {self.date_range[1]} (chunks {d1_str} to {d2_str})\n")
+            logger.info(f"{self.name}: Will run once for time period {self.date_range[0]} to {self.date_range[1]} (chunks {d1_str} to {d2_str})")
             date1_str = time_dumper.strftime(self.date_range[0], '%Y')
             date2_str = time_dumper.strftime(self.date_range[1], '%Y')
 
@@ -568,7 +583,7 @@ def task_generator(yaml_, experiment_components, experiment_start, experiment_st
         script_info = AnalysisScript(script_name, script_params, experiment_components,
                                      experiment_start, experiment_stop)
         if script_info.switch == False:
-            logger.info("must skip analysis {script_name}.")
+            logger.info(f"{script_name}: Skipping, switch set to off")
             continue
         yield script_info
 
@@ -588,10 +603,12 @@ def task_definitions(yaml_, experiment_components, experiment_start, experiment_
     Returns:
         String containing the task defintions.
     """
+    logger.debug("About to generate all task definitions")
     definitions = ""
     for script_info in task_generator(yaml_, experiment_components, experiment_start, experiment_stop):
         chunk = script_info.choose_pp_chunk(chunk1, chunk2)
         definitions += script_info.definition(chunk, pp_dir)
+    logger.debug("Finished generating all task definitions")
     return definitions
 
 
@@ -635,6 +652,7 @@ def get_analysis_info(experiment_yaml, info_type, experiment_components, pp_dir,
         chunk2 (str): Larger chunk size (optional)
         analysis_only (bool): make task graphs not depend on REMAP-PP-COMPONENTS
     """
+    logger.debug("get_analysis_info: starting")
     # Convert strings to date objects.
     experiment_start = time_parser.parse(experiment_start)
     experiment_stop = time_parser.parse(experiment_stop)
@@ -648,9 +666,11 @@ def get_analysis_info(experiment_yaml, info_type, experiment_components, pp_dir,
     with open(experiment_yaml) as file_:
         yaml_ = safe_load(file_)
         if info_type == "task-graph":
+            logger.debug("get_analysis_info: about to return graph")
             return task_graph(yaml_, experiment_components, experiment_start,
                               experiment_stop, chunk1, chunk2, analysis_only)
         elif info_type == "task-definitions":
+            logger.debug("get_analysis_info: about to return definitions")
             return task_definitions(yaml_, experiment_components, experiment_start,
                                    experiment_stop, chunk1, chunk2, pp_dir)
         raise ValueError(f"Invalid information type: {info_type}.")
diff --git a/app/regrid-xy/regrid_xy.py b/app/regrid-xy/regrid_xy.py
@@ -10,6 +10,7 @@
 import shutil
 import os
 from pathlib import Path
+import ast
 
 #3rd party
 import metomi.rose.config as rose_cfg
@@ -122,20 +123,21 @@ def check_per_component_settings(component_list, rose_app_cfg):
 def make_component_list(config, source):
     '''make list of relevant component names where source file appears in sources'''
     comp_list=[] #will not contain env, or command
-    try:
-        for keys, sub_node in config.walk():
-            sources = sub_node.get_value(keys=['sources'])#.split(' ')
-            if any( [ len(keys) < 1,
-                      keys[0] in ['env', 'command'],
-                      keys[0] in comp_list,
-                      sources is None                ] ):
-                continue
-            sources = sources.split(' ')
-            if source in sources:
-                comp_list.append(keys[0])
-    except Exception as exc:
-        raise Exception(f'config = {config} may be an empty file... check the config') \
-            from exc
+    for keys, sub_node in config.walk():
+        # only target the keys
+        if len(keys) != 1:
+            continue
+
+        # skip env and command keys
+        item = keys[0]
+        if item == "env" or item == "command":
+            continue
+
+        # convert ascii array to array
+        sources = ast.literal_eval(config.get_value(keys=[item, 'sources']))
+
+        if source in sources:
+            comp_list.append(item)
     return comp_list