diff --git a/CHANGES.md b/CHANGES.md index b956df0a5..d9b6a76eb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,11 +10,16 @@ been changed if no `tile_size` is specified for the target grid mapping. It now defaults to the `tile_size` of the source grid mapping, improving the user-friendliness of resampling and reprojection. +* The `"https"` data store (`store = new_data_store("https", ...)`) now allows + for lazily accessing NetCDF files. + Implementation note: For this to work, the `DatasetNetcdfFsDataAccessor` + class has been adjusted. ### Fixes * The function `xcube.core.resample.resample_in_space()` now always operates - lazily and therefore supports chunk-wise, parallel processing. (#1079) + lazily and therefore supports chunk-wise, parallel processing. (#1 + ## Changes in 1.7.1 diff --git a/test/core/store/fs/impl/test_https_netcdf.py b/test/core/store/fs/impl/test_https_netcdf.py new file mode 100644 index 000000000..45aa270b1 --- /dev/null +++ b/test/core/store/fs/impl/test_https_netcdf.py @@ -0,0 +1,39 @@ +# Copyright (c) 2018-2024 by xcube team and contributors +# Permissions are hereby granted under the terms of the MIT License: +# https://opensource.org/licenses/MIT. + +import unittest +from unittest.mock import patch + +import xarray as xr +import numpy as np + +from xcube.core.store import new_data_store + + +class HttpsNetcdfTest(unittest.TestCase): + """ + This class tests the access of a NetCDF file from a remote HTTPS server. + """ + + @patch("xarray.open_dataset") + def test_open_netcdf_https(self, mock_open_dataset): + # set-up mock + mock_data = { + "temperature": (("time", "x", "y"), np.random.rand(5, 5, 5)), + "precipitation": (("time", "x", "y"), np.random.rand(5, 5, 5)), + } + mock_ds = xr.Dataset(mock_data) + mock_open_dataset.return_value = mock_ds + + fs_path = "mockfile.nc" + store = new_data_store("https", root="root.de") + ds = store.open_data(fs_path) + + mock_open_dataset.assert_called_once_with( + "https://root.de/mockfile.nc#mode=bytes", engine="netcdf4" + ) + self.assertTrue("temperature" in ds) + self.assertTrue("precipitation" in ds) + self.assertEqual(ds["temperature"].shape, (5, 5, 5)) + self.assertEqual(ds["precipitation"].shape, (5, 5, 5)) diff --git a/xcube/core/store/fs/impl/dataset.py b/xcube/core/store/fs/impl/dataset.py index 85b90eaf5..51a2bc194 100644 --- a/xcube/core/store/fs/impl/dataset.py +++ b/xcube/core/store/fs/impl/dataset.py @@ -22,6 +22,7 @@ from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from xcube.util.fspath import is_local_fs +from xcube.util.fspath import is_https_fs from xcube.util.jsonencoder import to_json_value from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema @@ -230,9 +231,10 @@ def open_data(self, data_id: str, **open_params) -> xr.Dataset: # with fs.open(data_id, 'rb') as file: # return xr.open_dataset(file, engine=engine, **open_params) - is_local = is_local_fs(fs) - if is_local: + if is_local_fs(fs): file_path = data_id + elif is_https_fs(fs): + file_path = f"{fs.protocol}://{data_id}#mode=bytes" else: _, file_path = new_temp_file(suffix=".nc") fs.get_file(data_id, file_path) diff --git a/xcube/util/fspath.py b/xcube/util/fspath.py index 9b0f44056..10dd480ae 100644 --- a/xcube/util/fspath.py +++ b/xcube/util/fspath.py @@ -3,11 +3,11 @@ # https://opensource.org/licenses/MIT. import pathlib -from typing import Type from collections.abc import Iterator import fsspec from fsspec.implementations.local import LocalFileSystem +from fsspec.implementations.http import HTTPFileSystem def is_local_fs(fs: fsspec.AbstractFileSystem) -> bool: @@ -15,6 +15,11 @@ def is_local_fs(fs: fsspec.AbstractFileSystem) -> bool: return "file" in fs.protocol or isinstance(fs, LocalFileSystem) +def is_https_fs(fs: fsspec.AbstractFileSystem) -> bool: + """Check whether *fs* is a local filesystem.""" + return "https" in fs.protocol or isinstance(fs, HTTPFileSystem) + + def get_fs_path_class(fs: fsspec.AbstractFileSystem) -> type[pathlib.PurePath]: """Get the appropriate ``pathlib.PurePath`` class for the filesystem *fs*.""" if is_local_fs(fs):