Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Introducing RioXarrayReaderIterDataPipe for reading GeoTIFFs #6

Merged
merged 3 commits into from
Jun 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ jobs:
poetry plugin add poetry-dynamic-versioning-plugin
poetry show

# Run the regular tests
# Run the unit tests and doctests
- name: Test with pytest
run: poetry run --verbose pytest
run: poetry run --verbose pytest --doctest-modules
2 changes: 2 additions & 0 deletions docs/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ sphinx:
html_show_copyright: false
extra_extensions:
- 'sphinx.ext.autodoc'
- 'sphinx.ext.napoleon'
- 'sphinx.ext.viewcode'
4 changes: 3 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# API Reference

## DataPipes

```{eval-rst}
.. automodule:: zen3geo
.. automodule:: zen3geo.datapipes
:members:
```
319 changes: 318 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]


[tool.poetry.dependencies]
python = "^3.8"
rioxarray = ">=0.10.0"
torchdata = ">=0.3.0"

[tool.poetry.group.dev.dependencies]
black = "*"
Expand Down
2 changes: 2 additions & 0 deletions zen3geo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@

from importlib.metadata import version

from zen3geo.datapipes import RioXarrayReaderIterDataPipe as RioXarrayReader

__version__ = version("zen3geo") # e.g. 0.1.2.dev3+g0ab3cd78
83 changes: 83 additions & 0 deletions zen3geo/datapipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
Iterable-style DataPipes for geospatial raster and vector data.

Based on
https://github.com/pytorch/data/blob/v0.3.0/torchdata/datapipes/iter/load/online.py#L29-L59
"""
from typing import Any, Dict, Iterator, Optional, Tuple

import rioxarray
from torchdata.datapipes import functional_datapipe
from torchdata.datapipes.iter import IterDataPipe
from torchdata.datapipes.utils import StreamWrapper


@functional_datapipe("read_from_rioxarray")
class RioXarrayReaderIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]):
"""
Takes raster files (e.g. GeoTIFFs) from local disk or URLs
(as long as they can be read by rioxarray and/or rasterio)
and yields tuples of filename and xarray.DataArray objects
(functional name: ``read_from_rioxarray``).

Parameters
----------
source_datapipe : IterDataPipe[str]
A DataPipe that contains filepaths or URL links to raster files such as
GeoTIFFs.

kwargs : Optional
Extra keyword arguments to pass to ``rioxarray.open_rasterio`` and/or
``rasterio.open``. See
https://corteva.github.io/rioxarray/stable/rioxarray.html#rioxarray-open-rasterio
and https://rasterio.readthedocs.io/en/stable/api/rasterio.html#rasterio.open

Yields
------
stream_obj : Tuple[str, xarray.DataArray]
A tuple consisting of the filename that was passed in, and an
``xarray.DataArray`` object containing the raster data.

Example
-------
>>> from torchdata.datapipes.iter import IterableWrapper
>>> from zen3geo import RioXarrayReader
...
>>> # Read in GeoTIFF data using DataPipe
>>> file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif"
>>> dp = IterableWrapper(iterable=[file_url])
>>> dp_rioxarray = dp.read_from_rioxarray()
...
>>> # Loop or iterate over the DataPipe stream
>>> it = iter(dp_rioxarray)
>>> filename, dataarray = next(it)
>>> filename
'https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif'
>>> dataarray
StreamWrapper<<xarray.DataArray (band: 1, y: 960, x: 1920)>
[1843200 values with dtype=uint8]
Coordinates:
* band (band) int64 1
* x (x) float64 -179.9 -179.7 -179.5 -179.3 ... 179.5 179.7 179.9
* y (y) float64 89.91 89.72 89.53 89.34 ... -89.53 -89.72 -89.91
spatial_ref int64 0
...
"""

def __init__(
self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]]
) -> None:
self.source_datapipe: IterDataPipe[str] = source_datapipe
self.kwargs = kwargs

def __iter__(self) -> Iterator[Tuple]:
for filename in self.source_datapipe:
yield (
filename,
StreamWrapper(
rioxarray.open_rasterio(filename=filename, **self.kwargs)
),
)

def __len__(self) -> int:
return len(self.source_datapipe)
27 changes: 27 additions & 0 deletions zen3geo/tests/test_datapipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
Tests for datapipes.
"""
from torchdata.datapipes.iter import IterableWrapper

from zen3geo import RioXarrayReader


# %%
def test_rioxarray_reader():
"""
Ensure that RioXarrayReader works to read in a GeoTIFF file.
"""
file_url: str = "https://github.com/GenericMappingTools/gmtserver-admin/raw/master/cache/earth_day_HD.tif"
dp = IterableWrapper(iterable=[file_url])

# Using class constructors
dp_rioxarray = RioXarrayReader(source_datapipe=dp)
# Using functional form (recommended)
dp_rioxarray = dp.read_from_rioxarray()

it = iter(dp_rioxarray)
filename, dataarray = next(it)

assert isinstance(filename, str)
assert dataarray.shape == (1, 960, 1920)
assert dataarray.dims == ("band", "y", "x")