From bbf8d5f08cd46fbdb0e66200a08382fa32aeabea Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 22 Mar 2023 07:13:46 +0100 Subject: [PATCH 01/16] first version --- esmvaltool/cmorizers/data/datasets.yml | 10 ++ .../data/downloaders/datasets/jra_55.py | 98 +++++++++++++++++++ esmvaltool/cmorizers/data/downloaders/wget.py | 14 +++ 3 files changed, 122 insertions(+) create mode 100644 esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 096fd7b693..ff48a0db3d 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -765,6 +765,16 @@ datasets: mon/atmos/rsut/rsut_Amon_reanalysis_JRA-25_197901-201312.nc mon/atmos/rsutcs/rsutcs_Amon_reanalysis_JRA-25_197901-201312.nc + JRA-55: + tier: 2 + source: https://rda.ucar.edu/data/ds628.1/ + last_access: 2023-03-17 + info: | + Create an account on the research data archive (RDA) in order to be able + to download the data (1.25 degree, pressure levels). See + https://rda.ucar.edu/login/register/ and + https://rda.ucar.edu/datasets/ds628.1/ for more details. + Kadow2020: tier: 2 source: http://users.met.fu-berlin.de/~ChristopherKadow/ diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py new file mode 100644 index 0000000000..13e897cc14 --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -0,0 +1,98 @@ +"""Script to download JRA-55 from RDA.""" +import logging +import os + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +from datetime import datetime + +from dateutil import relativedelta + + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + + os.makedirs(downloader.local_folder, exist_ok=True) + +# user = os.environ.get("rda-user") +# if (user is None): +# user = str(input("RDA user name? ")) +# if (user is ""): +# print("A RDA account is required to download JRA-55 data.") +# print("Please visit https://rda.ucar.edu/login/register/") +# print("to create an account at the Research Data Archive (RDA)") +# print("if needed.") +# exit() +# +# passwd = os.environ.get("rda-passwd") +# if (passwd is None): +# passwd = str(input("RDA password? ")) + + if start_date is None: +# start_date = datetime(1958, 1, 1) + start_date = datetime(2022, 12, 1) + if end_date is None: + end_date = datetime(2022, 12, 31) + loop_date = start_date + + options = ["-O", "Authentication.log", "--save-cookies=auth.rda_ucar_edu", + f"--post-data=\"email={user}&passwd={passwd}&action=login\""] + + # login to Research Data Archive (RDA) + +# downloader.login("https://rda.ucar.edu/cgi-bin/login", options) + + # download files + + url = "https://rda.ucar.edu/data/ds628.1" + download_options = ["--load-cookies=auth.rda_ucar_edu"] + path = downloader.local_folder + + while loop_date <= end_date: + year = loop_date.year + month = loop_date.month + + fname = f"anl_p125.039_vvel.{year}01_{year}12" + print(fname) + downloader.download_file(url + f"/anl_p125/{year}/" + + fname, download_options) + os.rename(downloader.local_folder + "/" + fname, + downloader.local_folder + "/" + fname + ".grb") + + + + loop_date += relativedelta.relativedelta(months=1) + + # add extension ".grb" to downloaded files + +# files = os.listdir(path) +# +# for index, file in enumerate(files): +# os.rename(os.path.join(path, file), +# os.path.join(path, ''.join([file, '.grb']))) diff --git a/esmvaltool/cmorizers/data/downloaders/wget.py b/esmvaltool/cmorizers/data/downloaders/wget.py index 9db7afc478..413066c60b 100644 --- a/esmvaltool/cmorizers/data/downloaders/wget.py +++ b/esmvaltool/cmorizers/data/downloaders/wget.py @@ -54,6 +54,20 @@ def download_file(self, server_path, wget_options): logger.debug(command) subprocess.check_output(command) + def login(self, server_path, wget_options): + """Login. + + Parameters + ---------- + server_path: str + Path to remote file + wget_options: list(str) + Extra options for wget + """ + command = ['wget'] + wget_options + [server_path] + logger.debug(command) + subprocess.check_output(command) + @property def overwrite_options(self): """Get overwrite options as configured in downloader.""" From 370e51626521fdabe8f75dc8a9e1bb097a6aca16 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 22 Mar 2023 08:39:01 +0100 Subject: [PATCH 02/16] first working version of jra-55 downloader --- esmvaltool/cmorizers/data/datasets.yml | 5 +- .../data/downloaders/datasets/jra_55.py | 84 +++++++++++-------- 2 files changed, 51 insertions(+), 38 deletions(-) diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index ff48a0db3d..a19258be26 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -767,13 +767,12 @@ datasets: JRA-55: tier: 2 - source: https://rda.ucar.edu/data/ds628.1/ + source: https://rda.ucar.edu/datasets/ds628.1/ last_access: 2023-03-17 info: | Create an account on the research data archive (RDA) in order to be able to download the data (1.25 degree, pressure levels). See - https://rda.ucar.edu/login/register/ and - https://rda.ucar.edu/datasets/ds628.1/ for more details. + https://rda.ucar.edu/login/register/ for more details. Kadow2020: tier: 2 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index 13e897cc14..20c3ffa7cd 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -2,12 +2,12 @@ import logging import os -from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader - from datetime import datetime from dateutil import relativedelta +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + logger = logging.getLogger(__name__) @@ -40,23 +40,23 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, os.makedirs(downloader.local_folder, exist_ok=True) -# user = os.environ.get("rda-user") -# if (user is None): -# user = str(input("RDA user name? ")) -# if (user is ""): -# print("A RDA account is required to download JRA-55 data.") -# print("Please visit https://rda.ucar.edu/login/register/") -# print("to create an account at the Research Data Archive (RDA)") -# print("if needed.") -# exit() -# -# passwd = os.environ.get("rda-passwd") -# if (passwd is None): -# passwd = str(input("RDA password? ")) + user = os.environ.get("rda-user") + if user is None: + user = str(input("RDA user name? ")) + if user == "": + errmsg = ("A RDA account is required to download JRA-55 data." + " Please visit https://rda.ucar.edu/login/register/" + " to create an account at the Research Data Archive" + " (RDA) if needed.") + logger.error(errmsg) + raise ValueError + + passwd = os.environ.get("rda-passwd") + if passwd is None: + passwd = str(input("RDA password? ")) if start_date is None: -# start_date = datetime(1958, 1, 1) - start_date = datetime(2022, 12, 1) + start_date = datetime(1958, 1, 1) if end_date is None: end_date = datetime(2022, 12, 31) loop_date = start_date @@ -66,33 +66,47 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, # login to Research Data Archive (RDA) -# downloader.login("https://rda.ucar.edu/cgi-bin/login", options) + downloader.login("https://rda.ucar.edu/cgi-bin/login", options) # download files url = "https://rda.ucar.edu/data/ds628.1" download_options = ["--load-cookies=auth.rda_ucar_edu"] - path = downloader.local_folder + + # define variables to download + + var = {"039_vvel": "anl_p125", + "011_tmp": "anl_surf125", + "054_pwat": "fcst_column125", + "058_cice": "fcst_column125", + "227_cw": "fcst_column125", + "228_clwc": "fcst_p125", + "229_ciwc": "fcst_p125", + "160_csusf": "fcst_phy2m125", + "161_csdsf": "fcst_phy2m125", + "204_dswrf": "fcst_phy2m125", + "211_uswrf": "fcst_phy2m125", + "212_ulwrf": "fcst_phy2m125"} + + # download data while loop_date <= end_date: year = loop_date.year - month = loop_date.month - - fname = f"anl_p125.039_vvel.{year}01_{year}12" - print(fname) - downloader.download_file(url + f"/anl_p125/{year}/" + - fname, download_options) - os.rename(downloader.local_folder + "/" + fname, - downloader.local_folder + "/" + fname + ".grb") - + for var, channel in var.items(): + fname = f"{channel}.{var}.{year}01_{year}12" + # download file + downloader.download_file(url + f"/{channel}/{year}/" + + fname, download_options) + # add file extension ".grb" + os.rename(downloader.local_folder + "/" + fname, + downloader.local_folder + "/" + fname + ".grb") - loop_date += relativedelta.relativedelta(months=1) + loop_date += relativedelta.relativedelta(years=1) - # add extension ".grb" to downloaded files + # clean up temporary files -# files = os.listdir(path) -# -# for index, file in enumerate(files): -# os.rename(os.path.join(path, file), -# os.path.join(path, ''.join([file, '.grb']))) + if os.path.exists("Authentication.log"): + os.remove("Authentication.log") + if os.path.exists("auth.rda_ucar_edu"): + os.remove("auth.rda_ucar_edu") From 3b88dec2711a243f96547d5fa47e0e43e0ca8216 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 22 Mar 2023 08:44:37 +0100 Subject: [PATCH 03/16] fixed flake8 issues --- esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index 20c3ffa7cd..1ca6e916d9 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -62,7 +62,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, loop_date = start_date options = ["-O", "Authentication.log", "--save-cookies=auth.rda_ucar_edu", - f"--post-data=\"email={user}&passwd={passwd}&action=login\""] + f"--post-data=\"email={user}&passwd={passwd}&action=login\""] # login to Research Data Archive (RDA) @@ -97,10 +97,10 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, fname = f"{channel}.{var}.{year}01_{year}12" # download file downloader.download_file(url + f"/{channel}/{year}/" + - fname, download_options) + fname, download_options) # add file extension ".grb" os.rename(downloader.local_folder + "/" + fname, - downloader.local_folder + "/" + fname + ".grb") + downloader.local_folder + "/" + fname + ".grb") loop_date += relativedelta.relativedelta(years=1) From 2e0876979b0607ec12db0400f4eeb1f629a075ec Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Fri, 24 Mar 2023 15:04:39 +0100 Subject: [PATCH 04/16] snapshot 2023-03-23 --- .../cmorizers/data/cmor_config/JRA-55.yml | 64 ++++++++ esmvaltool/cmorizers/data/datasets.yml | 2 +- .../data/downloaders/datasets/jra_55.py | 17 +- .../data/formatters/datasets/jra_55.py | 154 ++++++++++++++++++ esmvaltool/references/jra_55.bibtex | 10 ++ 5 files changed, 238 insertions(+), 9 deletions(-) create mode 100644 esmvaltool/cmorizers/data/cmor_config/JRA-55.yml create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/jra_55.py create mode 100644 esmvaltool/references/jra_55.bibtex diff --git a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml new file mode 100644 index 0000000000..951d33f2f7 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml @@ -0,0 +1,64 @@ +--- +# Common global attributes for Cmorizer output +attributes: + dataset_id: JRA-55 + version: '1' + tier: 2 + modeling_realm: reanaly + project_id: OBS6 + source: 'https://rda.ucar.edu/datasets/ds628.1/' + reference: 'jra_55' + comment: | + '' + +# Variables to cmorize +variables: +# cli: +# short_name: cli +# mip: Amon +# file: fcst_p125.229_ciwc.*.grb + +# clivi: +# short_name: clivi +# mip: Amon +# file: fcst_column125.058_cice.*.grb + +# clw: +# short_name: clw +# mip: Amon +# file: fcst_p125.228_clwc.*.grb + +# clwvi: +# short_name: clwvi +# mip: Amon +# operator: sum +# files: +# - 'fcst_column125.058_cice.*.grb' +# - 'fcst_column125.227_cw.*.grb' + +# clt: +# short_name: clt +# mip: Amon +# file: fcst_surf125.071_tcdc.*.grb + +# prw: +# short_name: prw +# mip: Amon +# file: fcst_column125.054_pwat.*.grb + +# rsuscs: +# short_name: rsuscs +# mip: Amon +# start_element: 0 +# file: fcst_phy2m125.160_csusf.*.grb + + rsutcs: + short_name: rsutcs + mip: Amon + start_element: 1 + file: fcst_phy2m125.160_csusf.*.grb + + tas: + short_name: tas + mip: Amon + file: anl_surf125.011_tmp.*.grb diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index a19258be26..4a38259f77 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -768,7 +768,7 @@ datasets: JRA-55: tier: 2 source: https://rda.ucar.edu/datasets/ds628.1/ - last_access: 2023-03-17 + last_access: 2023-03-22 info: | Create an account on the research data archive (RDA) in order to be able to download the data (1.25 degree, pressure levels). See diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index 1ca6e916d9..a03e0b2f72 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -75,26 +75,27 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, # define variables to download - var = {"039_vvel": "anl_p125", - "011_tmp": "anl_surf125", + var = {"011_tmp": "anl_surf125", + "039_vvel": "anl_p125", + "071_tcdc": "fcst_surf125"} "054_pwat": "fcst_column125", "058_cice": "fcst_column125", - "227_cw": "fcst_column125", - "228_clwc": "fcst_p125", - "229_ciwc": "fcst_p125", "160_csusf": "fcst_phy2m125", "161_csdsf": "fcst_phy2m125", "204_dswrf": "fcst_phy2m125", "211_uswrf": "fcst_phy2m125", - "212_ulwrf": "fcst_phy2m125"} + "212_ulwrf": "fcst_phy2m125", + "227_cw": "fcst_column125", + "228_clwc": "fcst_p125", + "229_ciwc": "fcst_p125"} # download data while loop_date <= end_date: year = loop_date.year - for var, channel in var.items(): - fname = f"{channel}.{var}.{year}01_{year}12" + for varname, channel in var.items(): + fname = f"{channel}.{varname}.{year}01_{year}12" # download file downloader.download_file(url + f"/{channel}/{year}/" + fname, download_options) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py new file mode 100644 index 0000000000..1319e7994e --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -0,0 +1,154 @@ +""" +ESMValTool CMORizer for JRA-55 data. + +Tier + Tier 2: other freely-available dataset. + +Source + Research Data Archive (RDA): + https://rda.ucar.edu/datasets/ds628.1/ + +Last access + 20230322 + +Download and processing instructions + see download script cmorizers/data/downloaders/datasets/jra_55.py +""" + +import copy +import glob +import logging +import os + +import iris +import numpy as np + +from datetime import datetime +from iris_grib.message import GribMessage +from cf_units import Unit + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _load_jra55_grib(filenames, var): + """Load data from GRIB file and return list of cubes.""" + cubelist = [] + for infile in filenames: + tmp_cubes = iris.load(infile) + if len(tmp_cubes) > 1: + start_element = var.get('start_element', 0) + i = 0 + # create list of files (needed in case infile contains wildcards) + listing = glob.glob(infile) + for fname in listing: + for message in GribMessage.messages_from_filename(fname): + day = message.sections[1]['day'] + month = message.sections[1]['month'] + year = ((message.sections[1]['centuryOfReferenceTimeOfData'] + - 1) * 100 + message.sections[1]['yearOfCentury']) + + point = datetime(year=year, month=month, day=day) + time_units = Unit('days since 1950-01-01 00:00:00', + calendar='standard') + time_coord = iris.coords.DimCoord( + time_units.date2num(point), + var_name='time', + standard_name='time', + long_name='time', + units=time_units) + + tmp_cubes[i].add_aux_coord(time_coord) + tmp_cubes[i].remove_coord('originating_centre') + + i = i + 1 + + # message.sections[1]['indicatorOfTypeOfLevel'] always gives + # 'sfc', so a distinction between "surface" and "top of the + # atmosphere" is not possible. Instead, we simply use every + # second cube as "surface" and "top of the atmosphere" are + # alternating in the GRIB file with "surface" being element + # 'start_element' read from config file + # (esmvaltool/cmorizers/data/cmor_config/JRA-55.yml). + + cubelist.append(tmp_cubes[start_element::2].merge_cube()) + else: + #tmp_cubes.remove_coord('originating_centre') + cubelist = tmp_cubes + + return(cubelist) + + +def _extract_variable(short_name, var, in_files, cfg, in_dir, + out_dir): + """Extract variable.""" + # load data (returns a list of cubes) + + cubes = _load_jra55_grib(in_files, var) + + # apply operators (if any) + + if len(cubes) > 1: + if var.get('operator', '') == 'sum': + # Multiple variables case using sum operation + cube = None + for in_cube in cubes: + if cube is None: + cube = in_cube + else: + cube += in_cube + elif var.get('operator', '') == 'diff': + # two variables case using diff operation + if len(cubes) != 2: + errmsg = (f'operator diff selected for variable {short_name} ' + f'expects exactly two input variables and two input ' + f'files') + raise ValueError(errmsg) + cube = cubes[0] + cube -= cubes[1] + else: + oper = var.get('operator') + raise ValueError( + f'multiple input files found for variable {short_name} ' + f'with unknown operator {oper}') + else: + cube = cubes[0] + + print(cube) + print(cubes) + + # Fix metadata + cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) + attrs = copy.deepcopy(cfg['attributes']) + attrs['mip'] = var['mip'] + utils.fix_var_metadata(cube, cmor_info) + utils.set_global_atts(cube, attrs) + + # Save variable + utils.save_variable(cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time']) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + # Run the cmorization + for (short_name, var) in cfg['variables'].items(): + short_name = var['short_name'] + + if 'file' in var: + filename = [os.path.join(in_dir, var['file'])] + elif 'files' in var: + filename = [] + for file in var['files']: + filename.append(os.path.join(in_dir, file)) + else: + raise ValueError(f"No input file(s) specified for variable " + f"{short_name}.") + + logger.info("CMORizing variable '%s' from file '%s'", short_name, + filename) + _extract_variable(short_name, var, filename, cfg, in_dir, out_dir) diff --git a/esmvaltool/references/jra_55.bibtex b/esmvaltool/references/jra_55.bibtex new file mode 100644 index 0000000000..d979a6c9cc --- /dev/null +++ b/esmvaltool/references/jra_55.bibtex @@ -0,0 +1,10 @@ +@article{jra_55, + doi = {https://doi.org/10.5065/D60G3H5B}, + title={The JRA-55 Reanalysis: General Specifications and Basic Characteristics}, + author={Kobayashi, S. and Y. Ota and Y. Harada and A. Ebita and M. Moriya and H. Onoda and K. Onogi and H. Kamahori and C. Kobayashi and H. Endo and K. Miyaoka and K. Takahashi}, + journal={J. Met. Soc. Jap.}, + volume={93}, + number={1}, + pages={5-48}, + year={2015} +} From 15a848d48be058635f0179603e6b39b16c19c661 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 28 Mar 2023 07:43:57 +0200 Subject: [PATCH 05/16] first complete version --- doc/sphinx/source/input.rst | 2 + .../cmorizers/data/cmor_config/JRA-55.yml | 98 +++++++++++++------ .../data/downloaders/datasets/jra_55.py | 5 +- .../data/formatters/datasets/jra_55.py | 73 +++++++++----- .../recipes/examples/recipe_check_obs.yml | 23 +++++ 5 files changed, 141 insertions(+), 60 deletions(-) diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 2a0e9c2956..3723a827f8 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -341,6 +341,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | JRA-25 | clt, hus, prw, rlut, rlutcs, rsut, rsutcs (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| JRA-55 | cli, clivi, clw, clwvi, clt, prw, rlus, rlut, rlutcs, rsus, rsuscs, rsut, rsutcs, tas, wap (Amon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | Kadow2020 | tasa (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | LAI3g | lai (Lmon) | 3 | Python | diff --git a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml index 951d33f2f7..17300e1f82 100644 --- a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml +++ b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml @@ -13,44 +13,73 @@ attributes: # Variables to cmorize variables: -# cli: -# short_name: cli -# mip: Amon -# file: fcst_p125.229_ciwc.*.grb + cli: + short_name: cli + mip: Amon + file: fcst_p125.229_ciwc.*.grb + + clivi: + short_name: clivi + mip: Amon + file: fcst_column125.058_cice.*.grb + + clw: + short_name: clw + mip: Amon + file: fcst_p125.228_clwc.*.grb + + clwvi: + short_name: clwvi + mip: Amon + operator: sum + files: + - 'fcst_column125.058_cice.*.grb' + - 'fcst_column125.227_cw.*.grb' + + clt: + short_name: clt + mip: Amon + file: fcst_surf125.071_tcdc.*.grb + + prw: + short_name: prw + mip: Amon + file: fcst_column125.054_pwat.*.grb -# clivi: -# short_name: clivi -# mip: Amon -# file: fcst_column125.058_cice.*.grb + rlus: + short_name: rlus + mip: Amon + start_element: 0 + file: fcst_phy2m125.212_ulwrf.*.grb -# clw: -# short_name: clw -# mip: Amon -# file: fcst_p125.228_clwc.*.grb + rlut: + short_name: rlut + mip: Amon + start_element: 1 + file: fcst_phy2m125.212_ulwrf.*.grb -# clwvi: -# short_name: clwvi -# mip: Amon -# operator: sum -# files: -# - 'fcst_column125.058_cice.*.grb' -# - 'fcst_column125.227_cw.*.grb' + rlutcs: + short_name: rlutcs + mip: Amon + file: fcst_phy2m125.162_csulf.*.grb -# clt: -# short_name: clt -# mip: Amon -# file: fcst_surf125.071_tcdc.*.grb + rsus: + short_name: rsus + mip: Amon + start_element: 0 + file: fcst_phy2m125.211_uswrf.*.grb -# prw: -# short_name: prw -# mip: Amon -# file: fcst_column125.054_pwat.*.grb + rsuscs: + short_name: rsuscs + mip: Amon + start_element: 0 + file: fcst_phy2m125.160_csusf.*.grb -# rsuscs: -# short_name: rsuscs -# mip: Amon -# start_element: 0 -# file: fcst_phy2m125.160_csusf.*.grb + rsut: + short_name: rsut + mip: Amon + start_element: 1 + file: fcst_phy2m125.211_uswrf.*.grb rsutcs: short_name: rsutcs @@ -62,3 +91,8 @@ variables: short_name: tas mip: Amon file: anl_surf125.011_tmp.*.grb + + wap: + short_name: wap + mip: Amon + file: anl_p125.039_vvel.*.grb diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index a03e0b2f72..4794544c2b 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -77,12 +77,11 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, var = {"011_tmp": "anl_surf125", "039_vvel": "anl_p125", - "071_tcdc": "fcst_surf125"} + "071_tcdc": "fcst_surf125", "054_pwat": "fcst_column125", "058_cice": "fcst_column125", "160_csusf": "fcst_phy2m125", - "161_csdsf": "fcst_phy2m125", - "204_dswrf": "fcst_phy2m125", + "162_csulf": "fcst_phy2m125", "211_uswrf": "fcst_phy2m125", "212_ulwrf": "fcst_phy2m125", "227_cw": "fcst_column125", diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index 1319e7994e..43b36ed29c 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -20,13 +20,12 @@ import logging import os -import iris -import numpy as np - from datetime import datetime from iris_grib.message import GribMessage from cf_units import Unit +import iris + from esmvaltool.cmorizers.data import utilities as utils logger = logging.getLogger(__name__) @@ -38,20 +37,23 @@ def _load_jra55_grib(filenames, var): for infile in filenames: tmp_cubes = iris.load(infile) if len(tmp_cubes) > 1: - start_element = var.get('start_element', 0) + start_element = var.get('start_element') i = 0 - # create list of files (needed in case infile contains wildcards) - listing = glob.glob(infile) + # create list of files (needed in case 'infile' contains wildcards) + # note: list has to be sorted by year (i.e. filename) to be + # compatible with the output of iris.load() + listing = sorted(glob.glob(infile), key=os.path.basename) for fname in listing: for message in GribMessage.messages_from_filename(fname): day = message.sections[1]['day'] month = message.sections[1]['month'] - year = ((message.sections[1]['centuryOfReferenceTimeOfData'] + year = ((message.sections[1][ + 'centuryOfReferenceTimeOfData'] - 1) * 100 + message.sections[1]['yearOfCentury']) point = datetime(year=year, month=month, day=day) time_units = Unit('days since 1950-01-01 00:00:00', - calendar='standard') + calendar='standard') time_coord = iris.coords.DimCoord( time_units.date2num(point), var_name='time', @@ -64,29 +66,37 @@ def _load_jra55_grib(filenames, var): i = i + 1 + # Some JRA-55 GRIB files contain two fields: "surface" and + # "top of the atmosphere". As # message.sections[1]['indicatorOfTypeOfLevel'] always gives - # 'sfc', so a distinction between "surface" and "top of the - # atmosphere" is not possible. Instead, we simply use every - # second cube as "surface" and "top of the atmosphere" are - # alternating in the GRIB file with "surface" being element - # 'start_element' read from config file - # (esmvaltool/cmorizers/data/cmor_config/JRA-55.yml). - - cubelist.append(tmp_cubes[start_element::2].merge_cube()) + # 'sfc', a distinction between "surface" and "top of the + # atmosphere" is not possible (bug?). + # If "start_element" is given in the JRA-55 CMOR config file + # (esmvaltool/cmorizers/data/cmor_config/JRA-55.yml), we simply + # extract every second cube as "surface" and "top of the + # atmosphere" from the list of cube as the fields are alternating + # in the GRIB file. + # If "first element" is not specified in the JRA-55 CMOR config + # file, no selection of cubes is done before merging into a + # single cube. + + if start_element != None: + cubelist.append(tmp_cubes[start_element::2].merge_cube()) + else: + cubelist.append(tmp_cubes.merge_cube()) else: - #tmp_cubes.remove_coord('originating_centre') + tmp_cubes[0].remove_coord('originating_centre') cubelist = tmp_cubes - return(cubelist) + return cubelist -def _extract_variable(short_name, var, in_files, cfg, in_dir, - out_dir): +def _extract_variable(short_name, var, in_files, cfg, out_dir): """Extract variable.""" # load data (returns a list of cubes) cubes = _load_jra55_grib(in_files, var) - + # apply operators (if any) if len(cubes) > 1: @@ -115,14 +125,27 @@ def _extract_variable(short_name, var, in_files, cfg, in_dir, else: cube = cubes[0] - print(cube) - print(cubes) - # Fix metadata cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) attrs = copy.deepcopy(cfg['attributes']) attrs['mip'] = var['mip'] utils.fix_var_metadata(cube, cmor_info) + + # fix z-coordinate (if present) + + for coord in cube.dim_coords: + coord_type = iris.util.guess_coord_axis(coord) + if coord_type == 'Z': + coord.standard_name = 'air_pressure' + coord.long_name = 'pressure' + coord.var_name = 'plev' + coord.attributes['positive'] = 'down' + if coord.units == "hPa": + coord.convert_units('Pa') + utils.flip_dim_coord(cube, coord.standard_name) + + utils.fix_dim_coordnames(cube) + utils.fix_coords(cube) utils.set_global_atts(cube, attrs) # Save variable @@ -151,4 +174,4 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): logger.info("CMORizing variable '%s' from file '%s'", short_name, filename) - _extract_variable(short_name, var, filename, cfg, in_dir, out_dir) + _extract_variable(short_name, var, filename, cfg, out_dir) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 9e75d0d9ef..4228d64f84 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -502,6 +502,29 @@ diagnostics: type: reanaly, version: 1, start_year: 1979, end_year: 2013} scripts: null + JRA-55: + description: JRA-55 check + variables: + cli: + clivi: + clw: + clwvi: + clt: + prw: + rlus: + rlut: + rlutcs: + rsus: + rsuscs: + rsut: + rsutcs: + tas: + wap: + additional_datasets: + - {dataset: JRA-55, project: OBS6, mip: Amon, tier: 2, + type: reanaly, version: 1, start_year: 1958, end_year: 2022} + scripts: null + Kadow2020: description: Kadow2020 check variables: From 6b7aa5be54d116d7e8c59318276d7f78560b7f27 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 28 Mar 2023 07:52:23 +0200 Subject: [PATCH 06/16] fixed flake issues --- .../cmorizers/data/formatters/datasets/jra_55.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index 43b36ed29c..a1bb33c6c0 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -73,14 +73,14 @@ def _load_jra55_grib(filenames, var): # atmosphere" is not possible (bug?). # If "start_element" is given in the JRA-55 CMOR config file # (esmvaltool/cmorizers/data/cmor_config/JRA-55.yml), we simply - # extract every second cube as "surface" and "top of the - # atmosphere" from the list of cube as the fields are alternating - # in the GRIB file. - # If "first element" is not specified in the JRA-55 CMOR config + # extract every second cube from the list of cube starting + # with field "start_element" as the fields "surface" and "top of + # the atmosphere" are alternating in the GRIB file. + # If "start_element" is not specified in the JRA-55 CMOR config # file, no selection of cubes is done before merging into a # single cube. - - if start_element != None: + + if start_element is not None: cubelist.append(tmp_cubes[start_element::2].merge_cube()) else: cubelist.append(tmp_cubes.merge_cube()) From eacb84ce1d6a80687fbb32c1265cb3251af7c98c Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 28 Mar 2023 08:07:58 +0200 Subject: [PATCH 07/16] added iris-grib to environment*.yml --- environment.yml | 1 + environment_osx.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 147b20945d..c5d30adb0f 100644 --- a/environment.yml +++ b/environment.yml @@ -29,6 +29,7 @@ dependencies: - gdal - iris>=3.1.0 - iris-esmf-regrid + - iris-grib - jinja2 - joblib - lime diff --git a/environment_osx.yml b/environment_osx.yml index 38553fd43c..81fa97d36c 100644 --- a/environment_osx.yml +++ b/environment_osx.yml @@ -29,6 +29,7 @@ dependencies: - gdal - iris>=3.1.0 - iris-esmf-regrid + - iris-grib - jinja2 - joblib - lime From e401a51f446dbee1fe60e16348d54fc749b46974 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 28 Mar 2023 09:58:00 +0200 Subject: [PATCH 08/16] added iris-grib to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 3d07e755f3..250cae9ca7 100755 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ 'esmf-regrid', 'fiona', 'GDAL', + 'iris-grib', 'jinja2', 'joblib', 'lime', From 67f6cf1c3a69dfef96243abcf72ba6de4a84da25 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 18 Apr 2023 14:56:42 +0200 Subject: [PATCH 09/16] added variable ta to JRA-55 CMORizer --- doc/sphinx/source/input.rst | 2 +- esmvaltool/cmorizers/data/cmor_config/JRA-55.yml | 5 +++++ esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py | 3 ++- esmvaltool/recipes/examples/recipe_check_obs.yml | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 3723a827f8..c5325626a8 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -341,7 +341,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | JRA-25 | clt, hus, prw, rlut, rlutcs, rsut, rsutcs (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| JRA-55 | cli, clivi, clw, clwvi, clt, prw, rlus, rlut, rlutcs, rsus, rsuscs, rsut, rsutcs, tas, wap (Amon) | 2 | Python | +| JRA-55 | cli, clivi, clw, clwvi, clt, prw, rlus, rlut, rlutcs, rsus, rsuscs, rsut, rsutcs, ta, tas, wap (Amon)| 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | Kadow2020 | tasa (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml index 17300e1f82..67607c5759 100644 --- a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml +++ b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml @@ -87,6 +87,11 @@ variables: start_element: 1 file: fcst_phy2m125.160_csusf.*.grb + ta: + short_name: ta + mip: Amon + file: anl_p125.011_tmp.*.grb + tas: short_name: tas mip: Amon diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index 4794544c2b..4e361c86a5 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -75,7 +75,8 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, # define variables to download - var = {"011_tmp": "anl_surf125", + var = {"011_tmp": "anl_p125"}, + "011_tmp": "anl_surf125", "039_vvel": "anl_p125", "071_tcdc": "fcst_surf125", "054_pwat": "fcst_column125", diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 4228d64f84..0c69908631 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -518,6 +518,7 @@ diagnostics: rsuscs: rsut: rsutcs: + ta: tas: wap: additional_datasets: From 28189676e829e8991490d6e6f3f17b9757ee863a Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 1 Jun 2023 14:45:24 +0200 Subject: [PATCH 10/16] fixed issues in downloader --- .../data/downloaders/datasets/jra_55.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index 4e361c86a5..61490ebfff 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -75,26 +75,28 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, # define variables to download - var = {"011_tmp": "anl_p125"}, - "011_tmp": "anl_surf125", - "039_vvel": "anl_p125", - "071_tcdc": "fcst_surf125", - "054_pwat": "fcst_column125", - "058_cice": "fcst_column125", - "160_csusf": "fcst_phy2m125", - "162_csulf": "fcst_phy2m125", - "211_uswrf": "fcst_phy2m125", - "212_ulwrf": "fcst_phy2m125", - "227_cw": "fcst_column125", - "228_clwc": "fcst_p125", - "229_ciwc": "fcst_p125"} + var = [["011_tmp", "anl_p125"], + ["011_tmp", "anl_surf125"], + ["039_vvel", "anl_p125"], + ["071_tcdc", "fcst_surf125"], + ["054_pwat", "fcst_column125"], + ["058_cice", "fcst_column125"], + ["160_csusf", "fcst_phy2m125"], + ["162_csulf", "fcst_phy2m125"], + ["211_uswrf", "fcst_phy2m125"], + ["212_ulwrf", "fcst_phy2m125"], + ["227_cw", "fcst_column125"], + ["228_clwc", "fcst_p125"], + ["229_ciwc", "fcst_p125"]] # download data while loop_date <= end_date: year = loop_date.year - for varname, channel in var.items(): + for item in var: + varname = item[0] + channel = item[1] fname = f"{channel}.{varname}.{year}01_{year}12" # download file downloader.download_file(url + f"/{channel}/{year}/" + From c85c45d13d53d4b335236f3b0e14effb1239f539 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 17 Aug 2023 13:51:03 +0200 Subject: [PATCH 11/16] updated download path (changed by RDA) --- esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py index 61490ebfff..a5dc5b851c 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -70,7 +70,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date, # download files - url = "https://rda.ucar.edu/data/ds628.1" + url = "https://data.rda.ucar.edu/ds628.1" download_options = ["--load-cookies=auth.rda_ucar_edu"] # define variables to download From a0393836ada3c8fa2e6c41346fd1c0e8639b3c49 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Mon, 25 Sep 2023 15:01:42 +0200 Subject: [PATCH 12/16] Update esmvaltool/cmorizers/data/formatters/datasets/jra_55.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: RĂ©mi Kazeroni --- esmvaltool/cmorizers/data/formatters/datasets/jra_55.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index 9d4eca1a48..efa7535700 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -47,9 +47,11 @@ def _load_jra55_grib(filenames, var): for message in GribMessage.messages_from_filename(fname): day = message.sections[1]['day'] month = message.sections[1]['month'] - year = ((message.sections[1][ + year = ( + (message.sections[1][ 'centuryOfReferenceTimeOfData'] - - 1) * 100 + message.sections[1]['yearOfCentury']) + - 1) * 100 + message.sections[1]['yearOfCentury'] + ) point = datetime(year=year, month=month, day=day) time_units = Unit('days since 1950-01-01 00:00:00', From 8d33497fc0ca64963053a7384e6e295c1bc1dbd0 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 26 Sep 2023 06:36:57 +0200 Subject: [PATCH 13/16] JRA-55 formatter: added support for start/end (year) --- .../cmorizers/data/cmor_config/JRA-55.yml | 34 +++++++++--------- .../data/formatters/datasets/jra_55.py | 36 +++++++++++-------- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml index 67607c5759..f0aa6d2f26 100644 --- a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml +++ b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml @@ -16,88 +16,88 @@ variables: cli: short_name: cli mip: Amon - file: fcst_p125.229_ciwc.*.grb + file: fcst_p125.229_ciwc.{year}01_{year}12.grb clivi: short_name: clivi mip: Amon - file: fcst_column125.058_cice.*.grb + file: fcst_column125.058_cice.{year}01_{year}12.grb clw: short_name: clw mip: Amon - file: fcst_p125.228_clwc.*.grb + file: fcst_p125.228_clwc.{year}01_{year}12.grb clwvi: short_name: clwvi mip: Amon operator: sum files: - - 'fcst_column125.058_cice.*.grb' - - 'fcst_column125.227_cw.*.grb' + - 'fcst_column125.058_cice.{year}01_{year}12.grb' + - 'fcst_column125.227_cw.{year}01_{year}12.grb' clt: short_name: clt mip: Amon - file: fcst_surf125.071_tcdc.*.grb + file: fcst_surf125.071_tcdc.{year}01_{year}12.grb prw: short_name: prw mip: Amon - file: fcst_column125.054_pwat.*.grb + file: fcst_column125.054_pwat.{year}01_{year}12.grb rlus: short_name: rlus mip: Amon start_element: 0 - file: fcst_phy2m125.212_ulwrf.*.grb + file: fcst_phy2m125.212_ulwrf.{year}01_{year}12.grb rlut: short_name: rlut mip: Amon start_element: 1 - file: fcst_phy2m125.212_ulwrf.*.grb + file: fcst_phy2m125.212_ulwrf.{year}01_{year}12.grb rlutcs: short_name: rlutcs mip: Amon - file: fcst_phy2m125.162_csulf.*.grb + file: fcst_phy2m125.162_csulf.{year}01_{year}12.grb rsus: short_name: rsus mip: Amon start_element: 0 - file: fcst_phy2m125.211_uswrf.*.grb + file: fcst_phy2m125.211_uswrf.{year}01_{year}12.grb rsuscs: short_name: rsuscs mip: Amon start_element: 0 - file: fcst_phy2m125.160_csusf.*.grb + file: fcst_phy2m125.160_csusf.{year}01_{year}12.grb rsut: short_name: rsut mip: Amon start_element: 1 - file: fcst_phy2m125.211_uswrf.*.grb + file: fcst_phy2m125.211_uswrf.{year}01_{year}12.grb rsutcs: short_name: rsutcs mip: Amon start_element: 1 - file: fcst_phy2m125.160_csusf.*.grb + file: fcst_phy2m125.160_csusf.{year}01_{year}12.grb ta: short_name: ta mip: Amon - file: anl_p125.011_tmp.*.grb + file: anl_p125.011_tmp.{year}01_{year}12.grb tas: short_name: tas mip: Amon - file: anl_surf125.011_tmp.*.grb + file: anl_surf125.011_tmp.{year}01_{year}12.grb wap: short_name: wap mip: Amon - file: anl_p125.039_vvel.*.grb + file: anl_p125.039_vvel.{year}01_{year}12.grb diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index efa7535700..e1bd4659f4 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -161,19 +161,27 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" # Run the cmorization - for (short_name, var) in cfg['variables'].items(): - short_name = var['short_name'] - - if 'file' in var: - filename = [os.path.join(in_dir, var['file'])] - elif 'files' in var: + if start_date is None: + start_date = 1958 + else: + start_date = start_date.year + if end_date is None: + end_date = 2022 + else: + end_date = end_date.year + for year in range(start_date, end_date + 1): + for (short_name, var) in cfg['variables'].items(): + short_name = var['short_name'] filename = [] - for file in var['files']: - filename.append(os.path.join(in_dir, file)) - else: - raise ValueError(f"No input file(s) specified for variable " - f"{short_name}.") + if 'file' in var: + filename.append(os.path.join(in_dir, var['file'].format(year=year))) + elif 'files' in var: + for file in var['files']: + filename.append(os.path.join(in_dir, file.format(year=year))) + else: + raise ValueError(f"No input file(s) specified for variable " + f"{short_name}.") - logger.info("CMORizing variable '%s' from file '%s'", short_name, - filename) - _extract_variable(short_name, var, filename, cfg, out_dir) + logger.info("CMORizing variable '%s' from file '%s'", short_name, + filename) + _extract_variable(short_name, var, filename, cfg, out_dir) From 332de2369c40b4f3691857905aae161c320f88b4 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Tue, 26 Sep 2023 06:57:00 +0200 Subject: [PATCH 14/16] JRA-55 formatter: simplified code for operator diff --- esmvaltool/cmorizers/data/formatters/datasets/jra_55.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index e1bd4659f4..6818871cf7 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -66,6 +66,8 @@ def _load_jra55_grib(filenames, var): tmp_cubes[i].add_aux_coord(time_coord) tmp_cubes[i].remove_coord('originating_centre') + print(message.sections[1]['indicatorOfTypeOfLevel']) + i = i + 1 # Some JRA-55 GRIB files contain two fields: "surface" and @@ -117,8 +119,7 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): f'expects exactly two input variables and two input ' f'files') raise ValueError(errmsg) - cube = cubes[0] - cube -= cubes[1] + cube = cubes[0] - cubes[1] else: oper = var.get('operator') raise ValueError( From 76fff773e7608a6c4e670d9d3400f09ca1d1ac32 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Wed, 27 Sep 2023 13:41:02 +0200 Subject: [PATCH 15/16] JRA-55 formatter: switched from iris-grib to xarray --- environment.yml | 2 +- environment_osx.yml | 2 +- .../cmorizers/data/cmor_config/JRA-55.yml | 12 +- .../data/formatters/datasets/jra_55.py | 138 +++++++++--------- setup.py | 2 +- 5 files changed, 74 insertions(+), 82 deletions(-) diff --git a/environment.yml b/environment.yml index e62330dc58..be78716bd2 100644 --- a/environment.yml +++ b/environment.yml @@ -16,6 +16,7 @@ dependencies: - cdo >=1.9.7 - cdsapi - cf-units + - cfgrib - cftime - cmocean - cython @@ -30,7 +31,6 @@ dependencies: - gdal - iris >=3.6.0 - iris-esmf-regrid >=0.7.0 - - iris-grib - jinja2 - joblib - lime diff --git a/environment_osx.yml b/environment_osx.yml index 1a2a575e7e..d767113193 100644 --- a/environment_osx.yml +++ b/environment_osx.yml @@ -16,6 +16,7 @@ dependencies: - cdo >=1.9.7 - cdsapi - cf-units + - cfgrib - cftime - cmocean - cython @@ -30,7 +31,6 @@ dependencies: - gdal - iris >=3.6.0 - iris-esmf-regrid >=0.7.0 - - iris-grib - jinja2 - joblib - lime diff --git a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml index f0aa6d2f26..a4f4c8b379 100644 --- a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml +++ b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml @@ -49,13 +49,13 @@ variables: rlus: short_name: rlus mip: Amon - start_element: 0 + typeOfLevel: surface file: fcst_phy2m125.212_ulwrf.{year}01_{year}12.grb rlut: short_name: rlut mip: Amon - start_element: 1 + typeOfLevel: nominalTop file: fcst_phy2m125.212_ulwrf.{year}01_{year}12.grb rlutcs: @@ -66,25 +66,25 @@ variables: rsus: short_name: rsus mip: Amon - start_element: 0 + typeOfLevel: surface file: fcst_phy2m125.211_uswrf.{year}01_{year}12.grb rsuscs: short_name: rsuscs mip: Amon - start_element: 0 + typeOfLevel: surface file: fcst_phy2m125.160_csusf.{year}01_{year}12.grb rsut: short_name: rsut mip: Amon - start_element: 1 + typeOfLevel: nominalTop file: fcst_phy2m125.211_uswrf.{year}01_{year}12.grb rsutcs: short_name: rsutcs mip: Amon - start_element: 1 + typeOfLevel: nominalTop file: fcst_phy2m125.160_csusf.{year}01_{year}12.grb ta: diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index 6818871cf7..6819e67bdb 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -16,12 +16,10 @@ """ import copy -import glob import logging import os +import xarray as xr -from datetime import datetime -from iris_grib.message import GribMessage from cf_units import Unit import iris @@ -33,64 +31,39 @@ def _load_jra55_grib(filenames, var): """Load data from GRIB file and return list of cubes.""" + leveltype = var.get('typeOfLevel') cubelist = [] - for infile in filenames: - tmp_cubes = iris.load(infile) - if len(tmp_cubes) > 1: - start_element = var.get('start_element') - i = 0 - # create list of files (needed in case 'infile' contains wildcards) - # note: list has to be sorted by year (i.e. filename) to be - # compatible with the output of iris.load() - listing = sorted(glob.glob(infile), key=os.path.basename) - for fname in listing: - for message in GribMessage.messages_from_filename(fname): - day = message.sections[1]['day'] - month = message.sections[1]['month'] - year = ( - (message.sections[1][ - 'centuryOfReferenceTimeOfData'] - - 1) * 100 + message.sections[1]['yearOfCentury'] - ) - - point = datetime(year=year, month=month, day=day) - time_units = Unit('days since 1950-01-01 00:00:00', - calendar='standard') - time_coord = iris.coords.DimCoord( - time_units.date2num(point), - var_name='time', - standard_name='time', - long_name='time', - units=time_units) - - tmp_cubes[i].add_aux_coord(time_coord) - tmp_cubes[i].remove_coord('originating_centre') - - print(message.sections[1]['indicatorOfTypeOfLevel']) - - i = i + 1 - - # Some JRA-55 GRIB files contain two fields: "surface" and - # "top of the atmosphere". As - # message.sections[1]['indicatorOfTypeOfLevel'] always gives - # 'sfc', a distinction between "surface" and "top of the - # atmosphere" is not possible (bug?). - # If "start_element" is given in the JRA-55 CMOR config file - # (esmvaltool/cmorizers/data/cmor_config/JRA-55.yml), we simply - # extract every second cube from the list of cube starting - # with field "start_element" as the fields "surface" and "top of - # the atmosphere" are alternating in the GRIB file. - # If "start_element" is not specified in the JRA-55 CMOR config - # file, no selection of cubes is done before merging into a - # single cube. - - if start_element is not None: - cubelist.append(tmp_cubes[start_element::2].merge_cube()) - else: - cubelist.append(tmp_cubes.merge_cube()) - else: - tmp_cubes[0].remove_coord('originating_centre') - cubelist = tmp_cubes + if leveltype is not None: + dataset = xr.open_mfdataset(filenames, engine="cfgrib", + filter_by_keys={'typeOfLevel': leveltype}) + else: + dataset = xr.open_mfdataset(filenames, engine="cfgrib") + varnames = list(dataset.data_vars) + for varname in varnames: + da_tmp = dataset[varname] + # conversion to Iris cubes requires a valid standard_name + da_tmp.attrs['standard_name'] = var['standard_name'] + cube = da_tmp.to_iris() + # remove auxiliary coordinate 'time' + cube.remove_coord('time') + # rename coordinate from 'forecast_reference_time' to 'time + timecoord = cube.dim_coords[0] + timecoord.rename("time") + # convert unit string to cf_unit object + # (calendar (calendar=coord.units.calendar) must be irgnored + # or conversion fails + timecoord.units = Unit(timecoord.units) + # add forecast period to time coordinate to get the actual time + # for which the data are valid + forecast = cube.coord('forecast_period') # forecast period in hours + timecoord.points = timecoord.points + forecast.points * 3600 + # remove unneeded scalar variables to prevent warnings + auxcoordnames = ['step', 'entireAtmosphere', 'number', 'isobaricLayer', + 'surface', 'nominalTop', 'heightAboveGround'] + for aux_coord in cube.coords(dim_coords=False): + if aux_coord.var_name in auxcoordnames: + cube.remove_coord(aux_coord) + cubelist.append(cube) return cubelist @@ -98,7 +71,8 @@ def _load_jra55_grib(filenames, var): def _extract_variable(short_name, var, in_files, cfg, out_dir): """Extract variable.""" # load data (returns a list of cubes) - + cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) + var['standard_name'] = cmor_info.standard_name cubes = _load_jra55_grib(in_files, var) # apply operators (if any) @@ -129,15 +103,26 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): cube = cubes[0] # Fix metadata - cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) attrs = copy.deepcopy(cfg['attributes']) attrs['mip'] = var['mip'] utils.fix_var_metadata(cube, cmor_info) - # fix z-coordinate (if present) + if cube.var_name in ['hfls', 'hfss', 'rlus', 'rlut', 'rlutcs', 'rsus', + 'rsuscs', 'rsut', 'rsutcs']: + attrs['positive'] = 'up' + + if cube.var_name in ['rlds', 'rldscs', 'rsds', 'rsdscs', 'rsdt', 'rtmt', + 'tauu', 'tauv']: + attrs['positive'] = 'down' + + # fix longitudes and z-coordinate (if present) for coord in cube.dim_coords: coord_type = iris.util.guess_coord_axis(coord) + if coord_type == 'X': + # -> shift longitude coordinate by one grid box + # to match obs4mips/CREATE-IP grid + coord.points = coord.points + 360 / len(coord.points) if coord_type == 'Z': coord.standard_name = 'air_pressure' coord.long_name = 'pressure' @@ -148,7 +133,11 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): utils.flip_dim_coord(cube, coord.standard_name) utils.fix_dim_coordnames(cube) + utils.fix_coords(cube) + if 'height2m' in cmor_info.dimensions: + utils.add_height2m(cube) + utils.set_global_atts(cube, attrs) # Save variable @@ -156,7 +145,8 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): short_name, out_dir, attrs, - unlimited_dimensions=['time']) + unlimited_dimensions=['time'], + local_keys=['positive']) def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): @@ -170,19 +160,21 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): end_date = 2022 else: end_date = end_date.year - for year in range(start_date, end_date + 1): - for (short_name, var) in cfg['variables'].items(): - short_name = var['short_name'] - filename = [] + for (short_name, var) in cfg['variables'].items(): + short_name = var['short_name'] + filename = [] + for year in range(start_date, end_date + 1): if 'file' in var: - filename.append(os.path.join(in_dir, var['file'].format(year=year))) + filename.append(os.path.join(in_dir, + var['file'].format(year=year))) elif 'files' in var: for file in var['files']: - filename.append(os.path.join(in_dir, file.format(year=year))) + filename.append(os.path.join(in_dir, + file.format(year=year))) else: raise ValueError(f"No input file(s) specified for variable " f"{short_name}.") - logger.info("CMORizing variable '%s' from file '%s'", short_name, - filename) - _extract_variable(short_name, var, filename, cfg, out_dir) + logger.info("CMORizing variable '%s' from file '%s'", short_name, + filename) + _extract_variable(short_name, var, filename, cfg, out_dir) diff --git a/setup.py b/setup.py index 37c4dabd3e..834da8a70b 100755 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ 'cdo', 'cdsapi', 'cf-units', + 'cfgrib', 'cftime', 'cmocean', 'dask', @@ -36,7 +37,6 @@ 'esmf-regrid>=0.7.0', 'fiona', 'GDAL', - 'iris-grib', 'jinja2', 'joblib', 'lime', From bb1a30785ece30a01f572601ca15fe6617e3d940 Mon Sep 17 00:00:00 2001 From: Axel Lauer Date: Thu, 28 Sep 2023 06:38:46 +0200 Subject: [PATCH 16/16] removed blank line(s) --- esmvaltool/cmorizers/data/formatters/datasets/jra_55.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py index 6819e67bdb..16125d4c2f 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -76,7 +76,6 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): cubes = _load_jra55_grib(in_files, var) # apply operators (if any) - if len(cubes) > 1: if var.get('operator', '') == 'sum': # Multiple variables case using sum operation @@ -116,7 +115,6 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): attrs['positive'] = 'down' # fix longitudes and z-coordinate (if present) - for coord in cube.dim_coords: coord_type = iris.util.guess_coord_axis(coord) if coord_type == 'X': @@ -133,11 +131,9 @@ def _extract_variable(short_name, var, in_files, cfg, out_dir): utils.flip_dim_coord(cube, coord.standard_name) utils.fix_dim_coordnames(cube) - utils.fix_coords(cube) if 'height2m' in cmor_info.dimensions: utils.add_height2m(cube) - utils.set_global_atts(cube, attrs) # Save variable