diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index a72bce73aa..13779bd4a9 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -352,6 +352,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | JRA-25 | clt, hus, prw, rlut, rlutcs, rsut, rsutcs (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| JRA-55 | cli, clivi, clw, clwvi, clt, prw, rlus, rlut, rlutcs, rsus, rsuscs, rsut, rsutcs, ta, tas, wap (Amon)| 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | Kadow2020 | tasa (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | LAI3g | lai (Lmon) | 3 | Python | diff --git a/environment.yml b/environment.yml index f61304d5f8..34f567214a 100644 --- a/environment.yml +++ b/environment.yml @@ -14,6 +14,7 @@ dependencies: - cdo >=1.9.7 - cdsapi - cf-units + - cfgrib - cftime - cmocean - cython diff --git a/environment_osx.yml b/environment_osx.yml index 7287c76c92..5593c7afe0 100644 --- a/environment_osx.yml +++ b/environment_osx.yml @@ -14,6 +14,7 @@ dependencies: - cdo >=1.9.7 - cdsapi - cf-units + - cfgrib - cftime - cmocean - cython diff --git a/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml new file mode 100644 index 0000000000..a4f4c8b379 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/JRA-55.yml @@ -0,0 +1,103 @@ +--- +# Common global attributes for Cmorizer output +attributes: + dataset_id: JRA-55 + version: '1' + tier: 2 + modeling_realm: reanaly + project_id: OBS6 + source: 'https://rda.ucar.edu/datasets/ds628.1/' + reference: 'jra_55' + comment: | + '' + +# Variables to cmorize +variables: + cli: + short_name: cli + mip: Amon + file: fcst_p125.229_ciwc.{year}01_{year}12.grb + + clivi: + short_name: clivi + mip: Amon + file: fcst_column125.058_cice.{year}01_{year}12.grb + + clw: + short_name: clw + mip: Amon + file: fcst_p125.228_clwc.{year}01_{year}12.grb + + clwvi: + short_name: clwvi + mip: Amon + operator: sum + files: + - 'fcst_column125.058_cice.{year}01_{year}12.grb' + - 'fcst_column125.227_cw.{year}01_{year}12.grb' + + clt: + short_name: clt + mip: Amon + file: fcst_surf125.071_tcdc.{year}01_{year}12.grb + + prw: + short_name: prw + mip: Amon + file: fcst_column125.054_pwat.{year}01_{year}12.grb + + rlus: + short_name: rlus + mip: Amon + typeOfLevel: surface + file: fcst_phy2m125.212_ulwrf.{year}01_{year}12.grb + + rlut: + short_name: rlut + mip: Amon + typeOfLevel: nominalTop + file: fcst_phy2m125.212_ulwrf.{year}01_{year}12.grb + + rlutcs: + short_name: rlutcs + mip: Amon + file: fcst_phy2m125.162_csulf.{year}01_{year}12.grb + + rsus: + short_name: rsus + mip: Amon + typeOfLevel: surface + file: fcst_phy2m125.211_uswrf.{year}01_{year}12.grb + + rsuscs: + short_name: rsuscs + mip: Amon + typeOfLevel: surface + file: fcst_phy2m125.160_csusf.{year}01_{year}12.grb + + rsut: + short_name: rsut + mip: Amon + typeOfLevel: nominalTop + file: fcst_phy2m125.211_uswrf.{year}01_{year}12.grb + + rsutcs: + short_name: rsutcs + mip: Amon + typeOfLevel: nominalTop + file: fcst_phy2m125.160_csusf.{year}01_{year}12.grb + + ta: + short_name: ta + mip: Amon + file: anl_p125.011_tmp.{year}01_{year}12.grb + + tas: + short_name: tas + mip: Amon + file: anl_surf125.011_tmp.{year}01_{year}12.grb + + wap: + short_name: wap + mip: Amon + file: anl_p125.039_vvel.{year}01_{year}12.grb diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 757fce7d67..6ed10a4340 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -791,6 +791,15 @@ datasets: mon/atmos/rsut/rsut_Amon_reanalysis_JRA-25_197901-201312.nc mon/atmos/rsutcs/rsutcs_Amon_reanalysis_JRA-25_197901-201312.nc + JRA-55: + tier: 2 + source: https://rda.ucar.edu/datasets/ds628.1/ + last_access: 2023-03-22 + info: | + Create an account on the research data archive (RDA) in order to be able + to download the data (1.25 degree, pressure levels). See + https://rda.ucar.edu/login/register/ for more details. + Kadow2020: tier: 2 source: http://users.met.fu-berlin.de/~ChristopherKadow/ diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py new file mode 100644 index 0000000000..a5dc5b851c --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/jra_55.py @@ -0,0 +1,115 @@ +"""Script to download JRA-55 from RDA.""" +import logging +import os + +from datetime import datetime + +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + + os.makedirs(downloader.local_folder, exist_ok=True) + + user = os.environ.get("rda-user") + if user is None: + user = str(input("RDA user name? ")) + if user == "": + errmsg = ("A RDA account is required to download JRA-55 data." + " Please visit https://rda.ucar.edu/login/register/" + " to create an account at the Research Data Archive" + " (RDA) if needed.") + logger.error(errmsg) + raise ValueError + + passwd = os.environ.get("rda-passwd") + if passwd is None: + passwd = str(input("RDA password? ")) + + if start_date is None: + start_date = datetime(1958, 1, 1) + if end_date is None: + end_date = datetime(2022, 12, 31) + loop_date = start_date + + options = ["-O", "Authentication.log", "--save-cookies=auth.rda_ucar_edu", + f"--post-data=\"email={user}&passwd={passwd}&action=login\""] + + # login to Research Data Archive (RDA) + + downloader.login("https://rda.ucar.edu/cgi-bin/login", options) + + # download files + + url = "https://data.rda.ucar.edu/ds628.1" + download_options = ["--load-cookies=auth.rda_ucar_edu"] + + # define variables to download + + var = [["011_tmp", "anl_p125"], + ["011_tmp", "anl_surf125"], + ["039_vvel", "anl_p125"], + ["071_tcdc", "fcst_surf125"], + ["054_pwat", "fcst_column125"], + ["058_cice", "fcst_column125"], + ["160_csusf", "fcst_phy2m125"], + ["162_csulf", "fcst_phy2m125"], + ["211_uswrf", "fcst_phy2m125"], + ["212_ulwrf", "fcst_phy2m125"], + ["227_cw", "fcst_column125"], + ["228_clwc", "fcst_p125"], + ["229_ciwc", "fcst_p125"]] + + # download data + + while loop_date <= end_date: + year = loop_date.year + + for item in var: + varname = item[0] + channel = item[1] + fname = f"{channel}.{varname}.{year}01_{year}12" + # download file + downloader.download_file(url + f"/{channel}/{year}/" + + fname, download_options) + # add file extension ".grb" + os.rename(downloader.local_folder + "/" + fname, + downloader.local_folder + "/" + fname + ".grb") + + loop_date += relativedelta.relativedelta(years=1) + + # clean up temporary files + + if os.path.exists("Authentication.log"): + os.remove("Authentication.log") + if os.path.exists("auth.rda_ucar_edu"): + os.remove("auth.rda_ucar_edu") diff --git a/esmvaltool/cmorizers/data/downloaders/wget.py b/esmvaltool/cmorizers/data/downloaders/wget.py index 8544e1d727..2afcca1d5a 100644 --- a/esmvaltool/cmorizers/data/downloaders/wget.py +++ b/esmvaltool/cmorizers/data/downloaders/wget.py @@ -54,6 +54,20 @@ def download_file(self, server_path, wget_options): logger.debug(command) subprocess.check_output(command) + def login(self, server_path, wget_options): + """Login. + + Parameters + ---------- + server_path: str + Path to remote file + wget_options: list(str) + Extra options for wget + """ + command = ['wget'] + wget_options + [server_path] + logger.debug(command) + subprocess.check_output(command) + @property def overwrite_options(self): """Get overwrite options as configured in downloader.""" diff --git a/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py new file mode 100644 index 0000000000..16125d4c2f --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/jra_55.py @@ -0,0 +1,176 @@ +""" +ESMValTool CMORizer for JRA-55 data. + +Tier + Tier 2: other freely-available dataset. + +Source + Research Data Archive (RDA): + https://rda.ucar.edu/datasets/ds628.1/ + +Last access + 20230322 + +Download and processing instructions + see download script cmorizers/data/downloaders/datasets/jra_55.py +""" + +import copy +import logging +import os +import xarray as xr + +from cf_units import Unit + +import iris + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _load_jra55_grib(filenames, var): + """Load data from GRIB file and return list of cubes.""" + leveltype = var.get('typeOfLevel') + cubelist = [] + if leveltype is not None: + dataset = xr.open_mfdataset(filenames, engine="cfgrib", + filter_by_keys={'typeOfLevel': leveltype}) + else: + dataset = xr.open_mfdataset(filenames, engine="cfgrib") + varnames = list(dataset.data_vars) + for varname in varnames: + da_tmp = dataset[varname] + # conversion to Iris cubes requires a valid standard_name + da_tmp.attrs['standard_name'] = var['standard_name'] + cube = da_tmp.to_iris() + # remove auxiliary coordinate 'time' + cube.remove_coord('time') + # rename coordinate from 'forecast_reference_time' to 'time + timecoord = cube.dim_coords[0] + timecoord.rename("time") + # convert unit string to cf_unit object + # (calendar (calendar=coord.units.calendar) must be irgnored + # or conversion fails + timecoord.units = Unit(timecoord.units) + # add forecast period to time coordinate to get the actual time + # for which the data are valid + forecast = cube.coord('forecast_period') # forecast period in hours + timecoord.points = timecoord.points + forecast.points * 3600 + # remove unneeded scalar variables to prevent warnings + auxcoordnames = ['step', 'entireAtmosphere', 'number', 'isobaricLayer', + 'surface', 'nominalTop', 'heightAboveGround'] + for aux_coord in cube.coords(dim_coords=False): + if aux_coord.var_name in auxcoordnames: + cube.remove_coord(aux_coord) + cubelist.append(cube) + + return cubelist + + +def _extract_variable(short_name, var, in_files, cfg, out_dir): + """Extract variable.""" + # load data (returns a list of cubes) + cmor_info = cfg['cmor_table'].get_variable(var['mip'], short_name) + var['standard_name'] = cmor_info.standard_name + cubes = _load_jra55_grib(in_files, var) + + # apply operators (if any) + if len(cubes) > 1: + if var.get('operator', '') == 'sum': + # Multiple variables case using sum operation + cube = None + for in_cube in cubes: + if cube is None: + cube = in_cube + else: + cube += in_cube + elif var.get('operator', '') == 'diff': + # two variables case using diff operation + if len(cubes) != 2: + errmsg = (f'operator diff selected for variable {short_name} ' + f'expects exactly two input variables and two input ' + f'files') + raise ValueError(errmsg) + cube = cubes[0] - cubes[1] + else: + oper = var.get('operator') + raise ValueError( + f'multiple input files found for variable {short_name} ' + f'with unknown operator {oper}') + else: + cube = cubes[0] + + # Fix metadata + attrs = copy.deepcopy(cfg['attributes']) + attrs['mip'] = var['mip'] + utils.fix_var_metadata(cube, cmor_info) + + if cube.var_name in ['hfls', 'hfss', 'rlus', 'rlut', 'rlutcs', 'rsus', + 'rsuscs', 'rsut', 'rsutcs']: + attrs['positive'] = 'up' + + if cube.var_name in ['rlds', 'rldscs', 'rsds', 'rsdscs', 'rsdt', 'rtmt', + 'tauu', 'tauv']: + attrs['positive'] = 'down' + + # fix longitudes and z-coordinate (if present) + for coord in cube.dim_coords: + coord_type = iris.util.guess_coord_axis(coord) + if coord_type == 'X': + # -> shift longitude coordinate by one grid box + # to match obs4mips/CREATE-IP grid + coord.points = coord.points + 360 / len(coord.points) + if coord_type == 'Z': + coord.standard_name = 'air_pressure' + coord.long_name = 'pressure' + coord.var_name = 'plev' + coord.attributes['positive'] = 'down' + if coord.units == "hPa": + coord.convert_units('Pa') + utils.flip_dim_coord(cube, coord.standard_name) + + utils.fix_dim_coordnames(cube) + utils.fix_coords(cube) + if 'height2m' in cmor_info.dimensions: + utils.add_height2m(cube) + utils.set_global_atts(cube, attrs) + + # Save variable + utils.save_variable(cube, + short_name, + out_dir, + attrs, + unlimited_dimensions=['time'], + local_keys=['positive']) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + # Run the cmorization + if start_date is None: + start_date = 1958 + else: + start_date = start_date.year + if end_date is None: + end_date = 2022 + else: + end_date = end_date.year + for (short_name, var) in cfg['variables'].items(): + short_name = var['short_name'] + filename = [] + for year in range(start_date, end_date + 1): + if 'file' in var: + filename.append(os.path.join(in_dir, + var['file'].format(year=year))) + elif 'files' in var: + for file in var['files']: + filename.append(os.path.join(in_dir, + file.format(year=year))) + else: + raise ValueError(f"No input file(s) specified for variable " + f"{short_name}.") + + logger.info("CMORizing variable '%s' from file '%s'", short_name, + filename) + _extract_variable(short_name, var, filename, cfg, out_dir) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 70368c87dc..820ceddbd0 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -521,6 +521,30 @@ diagnostics: type: reanaly, version: 1, start_year: 1979, end_year: 2013} scripts: null + JRA-55: + description: JRA-55 check + variables: + cli: + clivi: + clw: + clwvi: + clt: + prw: + rlus: + rlut: + rlutcs: + rsus: + rsuscs: + rsut: + rsutcs: + ta: + tas: + wap: + additional_datasets: + - {dataset: JRA-55, project: OBS6, mip: Amon, tier: 2, + type: reanaly, version: 1, start_year: 1958, end_year: 2022} + scripts: null + Kadow2020: description: Kadow2020 check variables: diff --git a/esmvaltool/references/jra_55.bibtex b/esmvaltool/references/jra_55.bibtex new file mode 100644 index 0000000000..d979a6c9cc --- /dev/null +++ b/esmvaltool/references/jra_55.bibtex @@ -0,0 +1,10 @@ +@article{jra_55, + doi = {https://doi.org/10.5065/D60G3H5B}, + title={The JRA-55 Reanalysis: General Specifications and Basic Characteristics}, + author={Kobayashi, S. and Y. Ota and Y. Harada and A. Ebita and M. Moriya and H. Onoda and K. Onogi and H. Kamahori and C. Kobayashi and H. Endo and K. Miyaoka and K. Takahashi}, + journal={J. Met. Soc. Jap.}, + volume={93}, + number={1}, + pages={5-48}, + year={2015} +} diff --git a/setup.py b/setup.py index c86dc4118e..5a4d8bc62c 100755 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ 'cdo', 'cdsapi', 'cf-units', + 'cfgrib', 'cftime', 'cmocean', 'dask',