-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Introducing RioXarrayReaderIterDataPipe for reading GeoTIFFs (#6)
An iterable-style DataPipe for GeoTIFF data! Composition over inheritance~ I/O handled using rioxarray. IterDataPipe code based on https://github.com/pytorch/data/blob/v0.3.0/torchdata/datapipes/iter/load/online.py#L29-L59. * ➕ Add torchdata A library of common modular data loading primitives for easily constructing flexible and performant data pipelines! * ➕ Add rioxarray Geospatial xarray extension powered by rasterio! Pinning minimum version to 0.10.0 which had dropped Python 3.7 support as per NEP29. * ✨ Introducing RioXarrayReaderIterDataPipe for reading GeoTIFFs An iterable-style DataPipe for GeoTIFF data! Composition over inheritance. Uses rioxarray for the I/O and the IterDataPipe code is based on https://github.com/pytorch/data/blob/v0.3.0/torchdata/datapipes/iter/load/online.py#L29-L59. Have added a unit test and doctest for good measure.
- Loading branch information
Showing
8 changed files
with
439 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
# API Reference | ||
|
||
## DataPipes | ||
|
||
```{eval-rst} | ||
.. automodule:: zen3geo | ||
.. automodule:: zen3geo.datapipes | ||
:members: | ||
``` |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
""" | ||
Iterable-style DataPipes for geospatial raster and vector data. | ||
Based on | ||
https://github.com/pytorch/data/blob/v0.3.0/torchdata/datapipes/iter/load/online.py#L29-L59 | ||
""" | ||
from typing import Any, Dict, Iterator, Optional, Tuple | ||
|
||
import rioxarray | ||
from torchdata.datapipes import functional_datapipe | ||
from torchdata.datapipes.iter import IterDataPipe | ||
from torchdata.datapipes.utils import StreamWrapper | ||
|
||
|
||
@functional_datapipe("read_from_rioxarray") | ||
class RioXarrayReaderIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]): | ||
""" | ||
Takes raster files (e.g. GeoTIFFs) from local disk or URLs | ||
(as long as they can be read by rioxarray and/or rasterio) | ||
and yields tuples of filename and xarray.DataArray objects | ||
(functional name: ``read_from_rioxarray``). | ||
Parameters | ||
---------- | ||
source_datapipe : IterDataPipe[str] | ||
A DataPipe that contains filepaths or URL links to raster files such as | ||
GeoTIFFs. | ||
kwargs : Optional | ||
Extra keyword arguments to pass to ``rioxarray.open_rasterio`` and/or | ||
``rasterio.open``. See | ||
https://corteva.github.io/rioxarray/stable/rioxarray.html#rioxarray-open-rasterio | ||
and https://rasterio.readthedocs.io/en/stable/api/rasterio.html#rasterio.open | ||
Yields | ||
------ | ||
stream_obj : Tuple[str, xarray.DataArray] | ||
A tuple consisting of the filename that was passed in, and an | ||
``xarray.DataArray`` object containing the raster data. | ||
Example | ||
------- | ||
>>> from torchdata.datapipes.iter import IterableWrapper | ||
>>> from zen3geo import RioXarrayReader | ||
... | ||
>>> # Read in GeoTIFF data using DataPipe | ||
>>> file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif" | ||
>>> dp = IterableWrapper(iterable=[file_url]) | ||
>>> dp_rioxarray = dp.read_from_rioxarray() | ||
... | ||
>>> # Loop or iterate over the DataPipe stream | ||
>>> it = iter(dp_rioxarray) | ||
>>> filename, dataarray = next(it) | ||
>>> filename | ||
'https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif' | ||
>>> dataarray | ||
StreamWrapper<<xarray.DataArray (band: 1, y: 960, x: 1920)> | ||
[1843200 values with dtype=uint8] | ||
Coordinates: | ||
* band (band) int64 1 | ||
* x (x) float64 -179.9 -179.7 -179.5 -179.3 ... 179.5 179.7 179.9 | ||
* y (y) float64 89.91 89.72 89.53 89.34 ... -89.53 -89.72 -89.91 | ||
spatial_ref int64 0 | ||
... | ||
""" | ||
|
||
def __init__( | ||
self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]] | ||
) -> None: | ||
self.source_datapipe: IterDataPipe[str] = source_datapipe | ||
self.kwargs = kwargs | ||
|
||
def __iter__(self) -> Iterator[Tuple]: | ||
for filename in self.source_datapipe: | ||
yield ( | ||
filename, | ||
StreamWrapper( | ||
rioxarray.open_rasterio(filename=filename, **self.kwargs) | ||
), | ||
) | ||
|
||
def __len__(self) -> int: | ||
return len(self.source_datapipe) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
""" | ||
Tests for datapipes. | ||
""" | ||
from torchdata.datapipes.iter import IterableWrapper | ||
|
||
from zen3geo import RioXarrayReader | ||
|
||
|
||
# %% | ||
def test_rioxarray_reader(): | ||
""" | ||
Ensure that RioXarrayReader works to read in a GeoTIFF file. | ||
""" | ||
file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif" | ||
dp = IterableWrapper(iterable=[file_url]) | ||
|
||
# Using class constructors | ||
dp_rioxarray = RioXarrayReader(source_datapipe=dp) | ||
# Using functional form (recommended) | ||
dp_rioxarray = dp.read_from_rioxarray() | ||
|
||
it = iter(dp_rioxarray) | ||
filename, dataarray = next(it) | ||
|
||
assert isinstance(filename, str) | ||
assert dataarray.shape == (1, 960, 1920) | ||
assert dataarray.dims == ("band", "y", "x") |