diff --git a/cads_adaptors/adaptors/cadsobs/adaptor.py b/cads_adaptors/adaptors/cadsobs/adaptor.py index e3bf440c..ab29a428 100644 --- a/cads_adaptors/adaptors/cadsobs/adaptor.py +++ b/cads_adaptors/adaptors/cadsobs/adaptor.py @@ -62,6 +62,10 @@ def _retrieve(self, request): service_definition = cadsobs_client.get_service_definition(dataset_name) field_attributes = cdm_lite_variables_dict["attributes"] global_attributes = service_definition["global_attributes"] + # Get licences from the config passed to the adaptor + global_attributes.update( + dict(licence_list=self.get_licences(self.mapped_request)) + ) self.context.debug( f"The following objects are going to be filtered: {object_urls}" ) diff --git a/cads_adaptors/adaptors/cadsobs/csv.py b/cads_adaptors/adaptors/cadsobs/csv.py index e92b1caf..32bffbe6 100644 --- a/cads_adaptors/adaptors/cadsobs/csv.py +++ b/cads_adaptors/adaptors/cadsobs/csv.py @@ -1,4 +1,5 @@ import logging +import zipfile from pathlib import Path import xarray @@ -38,22 +39,22 @@ def get_csv_header( ######################################################################################## # This file contains data retrieved from the CDS https://cds.climate.copernicus.eu/cdsapp#!/dataset/{dataset} # This is a C3S product under the following licences: -# - licence-to-use-copernicus-products -# - woudc-data-policy +{licence_list} # This is a CSV file following the CDS convention cdm-obs # Data source: {dataset_source} -# Version: # Time extent: {time_start} - {time_end} # Geographic area (minlat/maxlat/minlon/maxlon): {area} -# Variables selected and units +# Variables selected and units: {varstr} +# Uncertainty legend: +{uncertainty_str} ######################################################################################## """ if "latitude|station_configuration" in cdm_lite_dataset: coord_table = "station_configuration" else: coord_table = "header_table" - area = "{}/{}/{}/{}".format( + area = "{:.2f}/{:.2f}/{:.2f}/{:.2f}".format( cdm_lite_dataset[f"latitude|{coord_table}"].min().compute().item(), cdm_lite_dataset[f"latitude|{coord_table}"].max().compute().item(), cdm_lite_dataset[f"longitude|{coord_table}"].min().compute().item(), @@ -75,6 +76,22 @@ def get_csv_header( .itertuples(index=False, name=None) ) varstr = "\n".join([f"# {v} [{u}]" for v, u in vars_and_units]) + # Uncertainty documentation + uncertainty_vars = [ + str(v) for v in cdm_lite_dataset.data_vars if "uncertainty_value" in str(v) + ] + if len(uncertainty_vars) > 0: + unc_vars_and_names = [ + (u, cdm_lite_dataset[u].long_name) for u in uncertainty_vars + ] + uncertainty_str = "\n".join([f"# {u} {n}" for u, n in unc_vars_and_names]) + else: + uncertainty_str = "No uncertainty columns available for this dataset." + # List of licences + licence_list_str = "\n".join( + f"# {licence}" for licence in cdm_lite_dataset.attrs["licence_list"] + ) + # Render the header header_params = dict( dataset=retrieve_args.dataset, dataset_source=retrieve_args.params.dataset_source, @@ -82,6 +99,20 @@ def get_csv_header( time_start=time_start, time_end=time_end, varstr=varstr, + uncertainty_str=uncertainty_str, + licence_list=licence_list_str, ) header = template.format(**header_params) return header + + +def to_zip(input_file_path: Path) -> Path: + """Zips the given file into a .zip archive.""" + # Determine output zip path + output_zip_path = input_file_path.with_suffix(".zip") + + # Create zip archive + with zipfile.ZipFile(output_zip_path, "w") as zipf: + zipf.write(input_file_path, arcname=input_file_path.name) + + return output_zip_path diff --git a/cads_adaptors/adaptors/cadsobs/retrieve.py b/cads_adaptors/adaptors/cadsobs/retrieve.py index e28c7dfa..033f982a 100644 --- a/cads_adaptors/adaptors/cadsobs/retrieve.py +++ b/cads_adaptors/adaptors/cadsobs/retrieve.py @@ -4,7 +4,7 @@ import fsspec from cads_adaptors import Context -from cads_adaptors.adaptors.cadsobs.csv import to_csv +from cads_adaptors.adaptors.cadsobs.csv import to_csv, to_zip from cads_adaptors.adaptors.cadsobs.models import RetrieveArgs, RetrieveParams from cads_adaptors.adaptors.cadsobs.utils import ( _add_attributes, @@ -63,8 +63,10 @@ def retrieve_data( else: try: with dask.config.set(scheduler="single-threaded"): - output_path = to_csv(output_dir, output_path_netcdf, retrieve_args) + output_path_csv = to_csv(output_dir, output_path_netcdf, retrieve_args) + output_path = to_zip(output_path_csv) finally: # Ensure that the netCDF is not left behind taking disk space. output_path_netcdf.unlink() + output_path_csv.unlink() return output_path diff --git a/tests/test_cadsobs_adaptor.py b/tests/test_cadsobs_adaptor.py index 7ffbcd35..35c34267 100644 --- a/tests/test_cadsobs_adaptor.py +++ b/tests/test_cadsobs_adaptor.py @@ -1,4 +1,5 @@ import time +import zipfile from pathlib import Path from unittest.mock import Mock @@ -198,6 +199,7 @@ def _send_request(self, endpoint, method, payload): "rename": {"time_aggregation": "dataset_source", "variable": "variables"}, "force": {}, }, + "licences": ["licence-to-use-copernicus-products", "uscrn-data-policy"], } @@ -229,11 +231,9 @@ def test_adaptor_csv(tmp_path, monkeypatch): test_request_csv = TEST_REQUEST.copy() test_request_csv["format"] = "csv" result = adaptor.retrieve(test_request_csv) - tempfile = Path(tmp_path, "test_adaptor.csv") - with tempfile.open("wb") as tmpf: - tmpf.write(result.read()) - assert tempfile.stat().st_size > 0 - file_lines = tempfile.read_text().split("\n") + with zipfile.ZipFile(result, "r") as zipf: + file_lines = zipf.read(name=zipf.namelist()[0]).decode("UTF-8").split("\n") + assert len(file_lines) > 0 assert "# daily_maximum_air_temperature [K]" in file_lines assert "# daily_maximum_relative_humidity [%]" in file_lines