From 20944af126d4ffdaf87b50690779cd844def2bd3 Mon Sep 17 00:00:00 2001 From: konstntokas Date: Tue, 5 Nov 2024 10:00:25 +0100 Subject: [PATCH 1/4] allow to open netcdf file from remote https fs --- test/core/store/fs/impl/test_https_netcdf.py | 43 ++++++++++++++++++++ xcube/core/store/fs/impl/dataset.py | 6 ++- xcube/util/fspath.py | 7 +++- 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 test/core/store/fs/impl/test_https_netcdf.py diff --git a/test/core/store/fs/impl/test_https_netcdf.py b/test/core/store/fs/impl/test_https_netcdf.py new file mode 100644 index 000000000..e06830ec1 --- /dev/null +++ b/test/core/store/fs/impl/test_https_netcdf.py @@ -0,0 +1,43 @@ +# Copyright (c) 2018-2024 by xcube team and contributors +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import os.path +import unittest + +import fsspec +import rasterio as rio +import rioxarray +import s3fs +import xarray +import xarray as xr + +from xcube.core.store import new_data_store + + +class HttpsNetcdfTest(unittest.TestCase): + """ + This class tests a lazy access of a NetCDF file from a remote HTTPS server. + """ + + def test_open_netcdf_https(self): + """This test loads GAMIv2-0_2010-2020_100m.nc (217GB), which is available via + https://datapub.gfz-potsdam.de/download/10.5880.GFZ.1.4.2023.006-VEnuo/ + """ + fs_path = "download/10.5880.GFZ.1.4.2023.006-VEnuo/GAMIv2-0_2010-2020_100m.nc" + store = new_data_store("https", root="datapub.gfz-potsdam.de") + ds = store.open_data(fs_path, chunks={}) + self.assertIsInstance(ds, xr.Dataset) + self.assertEqual( + {"members": 20, "latitude": 202500, "longitude": 405000, "time": 2}, + ds.sizes, + ) + self.assertEqual( + [1, 7789, 15577, 1], + [ + ds.chunksizes["members"][0], + ds.chunksizes["latitude"][0], + ds.chunksizes["longitude"][0], + ds.chunksizes["time"][0], + ], + ) diff --git a/xcube/core/store/fs/impl/dataset.py b/xcube/core/store/fs/impl/dataset.py index 85b90eaf5..51a2bc194 100644 --- a/xcube/core/store/fs/impl/dataset.py +++ b/xcube/core/store/fs/impl/dataset.py @@ -22,6 +22,7 @@ from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from xcube.util.fspath import is_local_fs +from xcube.util.fspath import is_https_fs from xcube.util.jsonencoder import to_json_value from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema @@ -230,9 +231,10 @@ def open_data(self, data_id: str, **open_params) -> xr.Dataset: # with fs.open(data_id, 'rb') as file: # return xr.open_dataset(file, engine=engine, **open_params) - is_local = is_local_fs(fs) - if is_local: + if is_local_fs(fs): file_path = data_id + elif is_https_fs(fs): + file_path = f"{fs.protocol}://{data_id}#mode=bytes" else: _, file_path = new_temp_file(suffix=".nc") fs.get_file(data_id, file_path) diff --git a/xcube/util/fspath.py b/xcube/util/fspath.py index 9b0f44056..10dd480ae 100644 --- a/xcube/util/fspath.py +++ b/xcube/util/fspath.py @@ -3,11 +3,11 @@ # https://opensource.org/licenses/MIT. import pathlib -from typing import Type from collections.abc import Iterator import fsspec from fsspec.implementations.local import LocalFileSystem +from fsspec.implementations.http import HTTPFileSystem def is_local_fs(fs: fsspec.AbstractFileSystem) -> bool: @@ -15,6 +15,11 @@ def is_local_fs(fs: fsspec.AbstractFileSystem) -> bool: return "file" in fs.protocol or isinstance(fs, LocalFileSystem) +def is_https_fs(fs: fsspec.AbstractFileSystem) -> bool: + """Check whether *fs* is a local filesystem.""" + return "https" in fs.protocol or isinstance(fs, HTTPFileSystem) + + def get_fs_path_class(fs: fsspec.AbstractFileSystem) -> type[pathlib.PurePath]: """Get the appropriate ``pathlib.PurePath`` class for the filesystem *fs*.""" if is_local_fs(fs): From e908116a9f04ea3698e5a9a3b1b639d841502154 Mon Sep 17 00:00:00 2001 From: konstntokas Date: Tue, 5 Nov 2024 10:32:50 +0100 Subject: [PATCH 2/4] update CHANGES.log --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 6be207b96..d40462b27 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ ## Changes in 1.7.2 (in development) +### Enhancements + +* The `DatasetNetcdfFsDataAccessor` class has been adjusted, so that NetCDF files can + be now opened from a remote HTTPS server using the `"https"` data store. ## Changes in 1.7.1 From e9c86ab93bedaa31dff7b2226f217bc86662bc3f Mon Sep 17 00:00:00 2001 From: konstntokas Date: Tue, 5 Nov 2024 14:48:47 +0100 Subject: [PATCH 3/4] unittest as mock --- test/core/store/fs/impl/test_https_netcdf.py | 50 +++++++++----------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/test/core/store/fs/impl/test_https_netcdf.py b/test/core/store/fs/impl/test_https_netcdf.py index e06830ec1..45aa270b1 100644 --- a/test/core/store/fs/impl/test_https_netcdf.py +++ b/test/core/store/fs/impl/test_https_netcdf.py @@ -2,42 +2,38 @@ # Permissions are hereby granted under the terms of the MIT License: # https://opensource.org/licenses/MIT. -import os.path import unittest +from unittest.mock import patch -import fsspec -import rasterio as rio -import rioxarray -import s3fs -import xarray import xarray as xr +import numpy as np from xcube.core.store import new_data_store class HttpsNetcdfTest(unittest.TestCase): """ - This class tests a lazy access of a NetCDF file from a remote HTTPS server. + This class tests the access of a NetCDF file from a remote HTTPS server. """ - def test_open_netcdf_https(self): - """This test loads GAMIv2-0_2010-2020_100m.nc (217GB), which is available via - https://datapub.gfz-potsdam.de/download/10.5880.GFZ.1.4.2023.006-VEnuo/ - """ - fs_path = "download/10.5880.GFZ.1.4.2023.006-VEnuo/GAMIv2-0_2010-2020_100m.nc" - store = new_data_store("https", root="datapub.gfz-potsdam.de") - ds = store.open_data(fs_path, chunks={}) - self.assertIsInstance(ds, xr.Dataset) - self.assertEqual( - {"members": 20, "latitude": 202500, "longitude": 405000, "time": 2}, - ds.sizes, - ) - self.assertEqual( - [1, 7789, 15577, 1], - [ - ds.chunksizes["members"][0], - ds.chunksizes["latitude"][0], - ds.chunksizes["longitude"][0], - ds.chunksizes["time"][0], - ], + @patch("xarray.open_dataset") + def test_open_netcdf_https(self, mock_open_dataset): + # set-up mock + mock_data = { + "temperature": (("time", "x", "y"), np.random.rand(5, 5, 5)), + "precipitation": (("time", "x", "y"), np.random.rand(5, 5, 5)), + } + mock_ds = xr.Dataset(mock_data) + mock_open_dataset.return_value = mock_ds + + fs_path = "mockfile.nc" + store = new_data_store("https", root="root.de") + ds = store.open_data(fs_path) + + mock_open_dataset.assert_called_once_with( + "https://root.de/mockfile.nc#mode=bytes", engine="netcdf4" ) + self.assertTrue("temperature" in ds) + self.assertTrue("precipitation" in ds) + self.assertEqual(ds["temperature"].shape, (5, 5, 5)) + self.assertEqual(ds["precipitation"].shape, (5, 5, 5)) From 358d6c93eafc050b4bd69a841a319afb722be48f Mon Sep 17 00:00:00 2001 From: Konstantin Ntokas <38956538+konstntokas@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:23:41 +0100 Subject: [PATCH 4/4] Update CHANGES.md Co-authored-by: Norman Fomferra --- CHANGES.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d40462b27..9c2868ecf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,8 +2,10 @@ ### Enhancements -* The `DatasetNetcdfFsDataAccessor` class has been adjusted, so that NetCDF files can - be now opened from a remote HTTPS server using the `"https"` data store. +* The `"https"` data store (`store = new_data_store("https", ...)`) now allows + for lazily accessing NetCDF files. + Implementation note: For this to work, the `DatasetNetcdfFsDataAccessor` + class has been adjusted. ## Changes in 1.7.1