Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Obs adaptor fixes copco 1812 #244

Merged
merged 4 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cads_adaptors/adaptors/cadsobs/adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def _retrieve(self, request):
service_definition = cadsobs_client.get_service_definition(dataset_name)
field_attributes = cdm_lite_variables_dict["attributes"]
global_attributes = service_definition["global_attributes"]
# Get licences from the config passed to the adaptor
global_attributes.update(
dict(licence_list=self.get_licences(self.mapped_request))
)
self.context.debug(
f"The following objects are going to be filtered: {object_urls}"
)
Expand Down
41 changes: 36 additions & 5 deletions cads_adaptors/adaptors/cadsobs/csv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import zipfile
from pathlib import Path

import xarray
Expand Down Expand Up @@ -38,22 +39,22 @@ def get_csv_header(
########################################################################################
# This file contains data retrieved from the CDS https://cds.climate.copernicus.eu/cdsapp#!/dataset/{dataset}
# This is a C3S product under the following licences:
# - licence-to-use-copernicus-products
# - woudc-data-policy
{licence_list}
# This is a CSV file following the CDS convention cdm-obs
# Data source: {dataset_source}
# Version:
# Time extent: {time_start} - {time_end}
# Geographic area (minlat/maxlat/minlon/maxlon): {area}
# Variables selected and units
# Variables selected and units:
{varstr}
# Uncertainty legend:
{uncertainty_str}
########################################################################################
"""
if "latitude|station_configuration" in cdm_lite_dataset:
coord_table = "station_configuration"
else:
coord_table = "header_table"
area = "{}/{}/{}/{}".format(
area = "{:.2f}/{:.2f}/{:.2f}/{:.2f}".format(
cdm_lite_dataset[f"latitude|{coord_table}"].min().compute().item(),
cdm_lite_dataset[f"latitude|{coord_table}"].max().compute().item(),
cdm_lite_dataset[f"longitude|{coord_table}"].min().compute().item(),
Expand All @@ -75,13 +76,43 @@ def get_csv_header(
.itertuples(index=False, name=None)
)
varstr = "\n".join([f"# {v} [{u}]" for v, u in vars_and_units])
# Uncertainty documentation
uncertainty_vars = [
str(v) for v in cdm_lite_dataset.data_vars if "uncertainty_value" in str(v)
]
if len(uncertainty_vars) > 0:
unc_vars_and_names = [
(u, cdm_lite_dataset[u].long_name) for u in uncertainty_vars
]
uncertainty_str = "\n".join([f"# {u} {n}" for u, n in unc_vars_and_names])
else:
uncertainty_str = "No uncertainty columns available for this dataset."
# List of licences
licence_list_str = "\n".join(
f"# {licence}" for licence in cdm_lite_dataset.attrs["licence_list"]
)
# Render the header
header_params = dict(
dataset=retrieve_args.dataset,
dataset_source=retrieve_args.params.dataset_source,
area=area,
time_start=time_start,
time_end=time_end,
varstr=varstr,
uncertainty_str=uncertainty_str,
licence_list=licence_list_str,
)
header = template.format(**header_params)
return header


def to_zip(input_file_path: Path) -> Path:
"""Zips the given file into a .zip archive."""
# Determine output zip path
output_zip_path = input_file_path.with_suffix(".zip")

# Create zip archive
with zipfile.ZipFile(output_zip_path, "w") as zipf:
zipf.write(input_file_path, arcname=input_file_path.name)

return output_zip_path
6 changes: 4 additions & 2 deletions cads_adaptors/adaptors/cadsobs/retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import fsspec

from cads_adaptors import Context
from cads_adaptors.adaptors.cadsobs.csv import to_csv
from cads_adaptors.adaptors.cadsobs.csv import to_csv, to_zip
from cads_adaptors.adaptors.cadsobs.models import RetrieveArgs, RetrieveParams
from cads_adaptors.adaptors.cadsobs.utils import (
_add_attributes,
Expand Down Expand Up @@ -63,8 +63,10 @@ def retrieve_data(
else:
try:
with dask.config.set(scheduler="single-threaded"):
output_path = to_csv(output_dir, output_path_netcdf, retrieve_args)
output_path_csv = to_csv(output_dir, output_path_netcdf, retrieve_args)
output_path = to_zip(output_path_csv)
finally:
# Ensure that the netCDF is not left behind taking disk space.
output_path_netcdf.unlink()
output_path_csv.unlink()
return output_path
10 changes: 5 additions & 5 deletions tests/test_cadsobs_adaptor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import time
import zipfile
from pathlib import Path
from unittest.mock import Mock

Expand Down Expand Up @@ -198,6 +199,7 @@ def _send_request(self, endpoint, method, payload):
"rename": {"time_aggregation": "dataset_source", "variable": "variables"},
"force": {},
},
"licences": ["licence-to-use-copernicus-products", "uscrn-data-policy"],
}


Expand Down Expand Up @@ -229,11 +231,9 @@ def test_adaptor_csv(tmp_path, monkeypatch):
test_request_csv = TEST_REQUEST.copy()
test_request_csv["format"] = "csv"
result = adaptor.retrieve(test_request_csv)
tempfile = Path(tmp_path, "test_adaptor.csv")
with tempfile.open("wb") as tmpf:
tmpf.write(result.read())
assert tempfile.stat().st_size > 0
file_lines = tempfile.read_text().split("\n")
with zipfile.ZipFile(result, "r") as zipf:
file_lines = zipf.read(name=zipf.namelist()[0]).decode("UTF-8").split("\n")
assert len(file_lines) > 0
assert "# daily_maximum_air_temperature [K]" in file_lines
assert "# daily_maximum_relative_humidity [%]" in file_lines

Expand Down
Loading