From a3835925d7f23d6af6eede314a350c1273eb3aee Mon Sep 17 00:00:00 2001 From: Jess <20195932+wrongkindofdoctor@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:42:20 -0400 Subject: [PATCH] Add alternate standard names entry to fieldlists and varlistEntry objects (#699) * add alternate_stanadard_names entries to precipitation_flux vars in CMIP and GFDL fieldlists add list of applicable realms to preciptitation flux * add alternate_standard_names attributes and property setters to DMDependentvariable class that is VarlistEntry parent class define realm parm as string or list * extend realm search in fieldlist lookup tables to use a realm list in the translation add list to realm type hints in translation module * extend standard_name query to list that includes alternate_standard_names if present in the translation object * break up rainfall_flux and precipitation_flux entries in CMIP and GFDL field tables since translator can't parse realm list correctly * revert realm type hints defined as string or list and casting realm strings to listsin translation module * change assertion to log errof if translation is None in varlist_util * define new standard_name for pp xarray vars using the translation standard_name if the query standard name is a list with alternates instead of a string --- data/fieldlist_CMIP.jsonc | 8 ++++++++ data/fieldlist_GFDL.jsonc | 25 ++++++++----------------- src/data_model.py | 17 ++++++++++++++++- src/preprocessor.py | 9 ++++++++- src/translation.py | 20 +++++++++++--------- src/varlist_util.py | 3 ++- 6 files changed, 53 insertions(+), 29 deletions(-) diff --git a/data/fieldlist_CMIP.jsonc b/data/fieldlist_CMIP.jsonc index 737ef1e09..e35f89d29 100644 --- a/data/fieldlist_CMIP.jsonc +++ b/data/fieldlist_CMIP.jsonc @@ -181,6 +181,14 @@ "standard_name": "precipitation_flux", "realm": "atmos", "units": "kg m-2 s-1", + "alternate_standard_names": ["rainfall_flux"], + "ndim": 3 + }, + "rainfall_flux": { + "standard_name": "rainfall_flux", + "realm": "seaIce", + "units": "kg m-2 s-1", + "alternate_standard_names": ["precipitation_flux"], "ndim": 3 }, "prc": { diff --git a/data/fieldlist_GFDL.jsonc b/data/fieldlist_GFDL.jsonc index d5da4341f..49f024d3f 100644 --- a/data/fieldlist_GFDL.jsonc +++ b/data/fieldlist_GFDL.jsonc @@ -186,28 +186,19 @@ "scalar_coord_templates": {"plev": "omega{value}"}, "ndim": 4 }, - // NOTE: pr is the same for sea Ice and ocean realms, and there are duplicate entries; - // TODO: refine realm entry parsing in framework to allow lists and strings - "pr": { - "standard_name": "rainfall_flux", - "long_name": "Surface Rainfall Rate into the Sea Ice Portion of the Grid Cell", - "realm": "seaIce", - "units": "kg m-2 s-1", - "ndim": 3 - }, - "pr": { - "standard_name": "rainfall_flux", - "long_name": "Surface Rainfall Rate into the Sea Ice Portion of the Grid Cell", - "realm": "ocean", - "units": "kg m-2 s-1", - "ndim": 3 - }, - "precip": { "standard_name": "precipitation_flux", "long_name":"", "realm": "atmos", "units": "kg m-2 s-1", + "alternate_standard_names": ["rainfall_flux"], + "ndim": 3 + }, + "rainfall_flux": { + "standard_name": "rainfall_flux", + "realm": "seaIce", + "units": "kg m-2 s-1", + "alternate_standard_names": ["precipitation_flux"], "ndim": 3 }, "prec_conv": { diff --git a/src/data_model.py b/src/data_model.py index 8a7046cb5..d6aa38120 100644 --- a/src/data_model.py +++ b/src/data_model.py @@ -126,6 +126,12 @@ def long_name(self): """ pass + @property + @abc.abstractmethod + def alternate_standard_names(self): + """Optional list of alternate variable standard_names to query""" + pass + class AbstractDMCoordinateBounds(AbstractDMDependentVariable): """Defines interface (set of attributes) for :class:`DMCoordinateBounds` @@ -764,6 +770,7 @@ class DMDependentVariable(_DMDimensionsMixin, AbstractDMDependentVariable): component: str = "" associated_files: str = "" rename_coords: bool = True + alternate_standard_names: list # dims: from _DMDimensionsMixin # scalar_coords: from _DMDimensionsMixin @@ -860,9 +867,17 @@ def realm(self): return self._realm @realm.setter - def realm(self, value: str): + def realm(self, value: str | list): self._realm = value + @property + def alternate_standard_names(self): + return self._alternate_standard_names + + @alternate_standard_names.setter + def alternate_standard_names(self, value: list): + self._alternate_standard_names = value + def add_scalar(self, ax, ax_value, **kwargs): """Metadata operation corresponding to taking a slice of a higher-dimensional variable (extracting its values at axis *ax* = *ax_value*). The diff --git a/src/preprocessor.py b/src/preprocessor.py index 388667ca4..faf1b70f5 100644 --- a/src/preprocessor.py +++ b/src/preprocessor.py @@ -931,6 +931,8 @@ def query_catalog(self, if var.translation.convention is not None: var_id = var.translation.name standard_name = var.translation.standard_name + if any(var.translation.alternate_standard_names): + standard_name = [var.translation.standard_name] + var.translation.alternate_standard_names date_range = var.translation.T.range if var.is_static: date_range = None @@ -1050,7 +1052,12 @@ def query_catalog(self, for vname in var_xr.variables: if (not isinstance(var_xr.variables[vname], xr.IndexVariable) and var_xr[vname].attrs.get('standard_name', None) is None): - var_xr[vname].attrs['standard_name'] = case_d.query.get('standard_name') + case_query_standard_name = case_d.query.get('standard_name') + if isinstance(case_query_standard_name, list): + new_standard_name = [name for name in case_query_standard_name if name == var.translation.standard_name][0] + else: + new_standard_name = case_query_standard_name + var_xr[vname].attrs['standard_name'] = new_standard_name var_xr[vname].attrs['name'] = vname if case_name not in cat_dict: cat_dict[case_name] = var_xr diff --git a/src/translation.py b/src/translation.py index ec5f961f1..ce090372f 100644 --- a/src/translation.py +++ b/src/translation.py @@ -106,6 +106,8 @@ def _process_var(section_name: str, in_dict, lut_dict): lut_dict['entries'][k].update({'long_name': ""}) if 'scalar_coord_templates' in v: sct_dict.update({k: v['scalar_coord_templates']}) + if 'alternate_standard_names' not in v: + lut_dict['entries'][k].update({'alternate_standard_names': list()}) return lut_dict, sct_dict d['axes_lut'] = util.WormDict() @@ -150,12 +152,13 @@ def to_CF_standard_name(self, standard_name: str, precip_vars = ['precipitation_rate', 'precipitation_flux'] # search the lookup table for the variable with the specified standard_name # realm, modifier, and long_name attributes + for var_name, var_dict in self.lut.items(): - if var_dict['standard_name'] == standard_name\ + if var_dict['standard_name'] == standard_name \ and var_dict['realm'] == realm\ and var_dict['modifier'] == modifier: # if not var_dict['long_name'] or var_dict['long_name'].lower() == long_name.lower(): - return var_name + return var_name else: if var_dict['standard_name'] in precip_vars and standard_name in precip_vars: return var_name @@ -176,7 +179,7 @@ def from_CF(self, TODO: expand with more ways to uniquely identify variable (eg cell methods). Args: standard_name: variable or name of the variable - realm: variable realm (atmos, ocean, land, ice, etc...) + realm: str variable realm (atmos, ocean, land, seaIce, etc...) modifier:optional string to distinguish a 3-D field from a 4-D field with the same var_or_name value long_name: str (optional) long name attribute of the variable @@ -213,8 +216,8 @@ def from_CF_name(self, convention. Args: - var_or_name: variable or name of the variable - realm: model realm of variable + var_or_name: str, variable or name of the variable + realm: str model realm of variable long_name: str (optional): long_name attribute of the variable modifier:optional string to distinguish a 3-D field from a 4-D field with the same var_or_name value @@ -253,7 +256,7 @@ def create_scalar_name(self, old_coord, new_coord: dict, var_id: str, log=_log) # construct convention's name for this variable on a level name_template = self.scalar_coord_templates[var_id][key] if new_coord.units.strip('').lower() == 'pa': - val = int(new_coord.value/100) + val = int(new_coord.value / 100) else: val = int(new_coord.value) @@ -304,8 +307,8 @@ def translate_coord(self, coord, class_dict=None, log=_log) -> dict: lut_val = v.get('value') if isinstance(coord.value, int) and isinstance(lut_val, str): v_int = int(float(lut_val)) - if v_int > coord.value and v_int/coord.value == 100 \ - or v_int < coord.value and coord.value/v_int == 100 or \ + if v_int > coord.value and v_int / coord.value == 100 \ + or v_int < coord.value and coord.value / v_int == 100 or \ v_int == coord.value: new_coord = v break @@ -376,7 +379,6 @@ def translate(self, var, from_convention: str): from_convention_tl = VariableTranslator().get_convention(from_convention) # Fieldlist entry for POD variable long_name = self.get_variable_long_name(var, has_scalar_coords) - fl_entries = from_convention_tl.from_CF(var.standard_name, var.realm, var.modifier, diff --git a/src/varlist_util.py b/src/varlist_util.py index 97332a4a7..f256acd5b 100644 --- a/src/varlist_util.py +++ b/src/varlist_util.py @@ -560,7 +560,8 @@ def setup_var(self, v.dest_path = self.variable_dest_path(model_paths, case_name, v) try: trans_v = translate.translate(v, from_convention) - assert trans_v is not None, f'translation for varlistentry {v.name} failed' + if trans_v is None: + v.log.error(f'translation for varlistEntry {v.name} failed') v.translation = trans_v # copy preferred gfdl post-processing component during translation if hasattr(trans_v, "component"):