ecmwf-projects · EddyCMWF · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/cads_adaptors/adaptors/cadsobs/adaptor.py b/cads_adaptors/adaptors/cadsobs/adaptor.py
@@ -62,6 +62,10 @@ def _retrieve(self, request):
         service_definition = cadsobs_client.get_service_definition(dataset_name)
         field_attributes = cdm_lite_variables_dict["attributes"]
         global_attributes = service_definition["global_attributes"]
+        # Get licences from the config passed to the adaptor
+        global_attributes.update(
+            dict(licence_list=self.get_licences(self.mapped_request))
+        )
         self.context.debug(
             f"The following objects are going to be filtered: {object_urls}"
         )

diff --git a/cads_adaptors/adaptors/cadsobs/csv.py b/cads_adaptors/adaptors/cadsobs/csv.py
@@ -1,4 +1,5 @@
 import logging
+import zipfile
 from pathlib import Path
 
 import xarray
@@ -38,22 +39,22 @@ def get_csv_header(
 ########################################################################################
 # This file contains data retrieved from the CDS https://cds.climate.copernicus.eu/cdsapp#!/dataset/{dataset}
 # This is a C3S product under the following licences:
-#     - licence-to-use-copernicus-products
-#     - woudc-data-policy
+{licence_list}
 # This is a CSV file following the CDS convention cdm-obs
 # Data source: {dataset_source}
-# Version:
 # Time extent: {time_start} - {time_end}
 # Geographic area (minlat/maxlat/minlon/maxlon): {area}
-# Variables selected and units
+# Variables selected and units:
 {varstr}
+# Uncertainty legend:
+{uncertainty_str}
 ########################################################################################
 """
     if "latitude|station_configuration" in cdm_lite_dataset:
         coord_table = "station_configuration"
     else:
         coord_table = "header_table"
-    area = "{}/{}/{}/{}".format(
+    area = "{:.2f}/{:.2f}/{:.2f}/{:.2f}".format(
         cdm_lite_dataset[f"latitude|{coord_table}"].min().compute().item(),
         cdm_lite_dataset[f"latitude|{coord_table}"].max().compute().item(),
         cdm_lite_dataset[f"longitude|{coord_table}"].min().compute().item(),
@@ -75,13 +76,43 @@ def get_csv_header(
         .itertuples(index=False, name=None)
     )
     varstr = "\n".join([f"# {v} [{u}]" for v, u in vars_and_units])
+    # Uncertainty documentation
+    uncertainty_vars = [
+        str(v) for v in cdm_lite_dataset.data_vars if "uncertainty_value" in str(v)
+    ]
+    if len(uncertainty_vars) > 0:
+        unc_vars_and_names = [
+            (u, cdm_lite_dataset[u].long_name) for u in uncertainty_vars
+        ]
+        uncertainty_str = "\n".join([f"# {u} {n}" for u, n in unc_vars_and_names])
+    else:
+        uncertainty_str = "No uncertainty columns available for this dataset."
+    # List of licences
+    licence_list_str = "\n".join(
+        f"# {licence}" for licence in cdm_lite_dataset.attrs["licence_list"]
+    )
+    # Render the header
     header_params = dict(
         dataset=retrieve_args.dataset,
         dataset_source=retrieve_args.params.dataset_source,
         area=area,
         time_start=time_start,
         time_end=time_end,
         varstr=varstr,
+        uncertainty_str=uncertainty_str,
+        licence_list=licence_list_str,
     )
     header = template.format(**header_params)
     return header
+
+
+def to_zip(input_file_path: Path) -> Path:
+    """Zips the given file into a .zip archive."""
+    # Determine output zip path
+    output_zip_path = input_file_path.with_suffix(".zip")
+
+    # Create zip archive
+    with zipfile.ZipFile(output_zip_path, "w") as zipf:
+        zipf.write(input_file_path, arcname=input_file_path.name)
+
+    return output_zip_path
diff --git a/cads_adaptors/adaptors/cadsobs/retrieve.py b/cads_adaptors/adaptors/cadsobs/retrieve.py
@@ -4,7 +4,7 @@
 import fsspec
 
 from cads_adaptors import Context
-from cads_adaptors.adaptors.cadsobs.csv import to_csv
+from cads_adaptors.adaptors.cadsobs.csv import to_csv, to_zip
 from cads_adaptors.adaptors.cadsobs.models import RetrieveArgs, RetrieveParams
 from cads_adaptors.adaptors.cadsobs.utils import (
     _add_attributes,
@@ -63,8 +63,10 @@ def retrieve_data(
     else:
         try:
             with dask.config.set(scheduler="single-threaded"):
-                output_path = to_csv(output_dir, output_path_netcdf, retrieve_args)
+                output_path_csv = to_csv(output_dir, output_path_netcdf, retrieve_args)
+                output_path = to_zip(output_path_csv)
         finally:
             # Ensure that the netCDF is not left behind taking disk space.
             output_path_netcdf.unlink()
+            output_path_csv.unlink()
     return output_path
diff --git a/tests/test_cadsobs_adaptor.py b/tests/test_cadsobs_adaptor.py
@@ -1,4 +1,5 @@
 import time
+import zipfile
 from pathlib import Path
 from unittest.mock import Mock
 
@@ -198,6 +199,7 @@ def _send_request(self, endpoint, method, payload):
         "rename": {"time_aggregation": "dataset_source", "variable": "variables"},
         "force": {},
     },
+    "licences": ["licence-to-use-copernicus-products", "uscrn-data-policy"],
 }
 
 
@@ -229,11 +231,9 @@ def test_adaptor_csv(tmp_path, monkeypatch):
     test_request_csv = TEST_REQUEST.copy()
     test_request_csv["format"] = "csv"
     result = adaptor.retrieve(test_request_csv)
-    tempfile = Path(tmp_path, "test_adaptor.csv")
-    with tempfile.open("wb") as tmpf:
-        tmpf.write(result.read())
-    assert tempfile.stat().st_size > 0
-    file_lines = tempfile.read_text().split("\n")
+    with zipfile.ZipFile(result, "r") as zipf:
+        file_lines = zipf.read(name=zipf.namelist()[0]).decode("UTF-8").split("\n")
+    assert len(file_lines) > 0
     assert "# daily_maximum_air_temperature [K]" in file_lines
     assert "# daily_maximum_relative_humidity [%]" in file_lines