bugs fixes.

Hitesh Tolani · Hitesh Tolani · commit f3a77fc14573 · 2023-11-18T19:45:20.000+05:30
diff --git a/torchgeo/datasets/__init__.py b/torchgeo/datasets/__init__.py
@@ -77,6 +77,7 @@
 from .millionaid import MillionAID
 from .naip import NAIP
 from .nasa_marine_debris import NASAMarineDebris
+from .nccm import NCCM
 from .nlcd import NLCD
 from .openbuildings import OpenBuildings
 from .oscd import OSCD
@@ -116,6 +117,7 @@
 from .usavars import USAVars
 from .utils import (
     BoundingBox,
+    DatasetNotFoundError,
     concat_samples,
     merge_samples,
     stack_samples,
@@ -167,6 +169,7 @@
     "Landsat8",
     "Landsat9",
     "NAIP",
+    "NCCM",
     "NLCD",
     "OpenBuildings",
     "Sentinel",
@@ -253,4 +256,6 @@
     "random_grid_cell_assignment",
     "roi_split",
     "time_series_split",
+    # Errors
+    "DatasetNotFoundError",
 )
diff --git a/torchgeo/datasets/nccm.py b/torchgeo/datasets/nccm.py
@@ -0,0 +1,207 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""Northeastern China Crop Map Dataset."""
+
+import glob
+import os
+from collections.abc import Iterable
+import pathlib
+from typing import Any, Callable, Optional, Union
+
+import matplotlib.pyplot as plt
+import torch
+from matplotlib.figure import Figure
+from rasterio.crs import CRS
+
+from .geo import RasterDataset
+from .utils import BoundingBox, DatasetNotFoundError, download_url, extract_archive
+
+
+class NCCM(RasterDataset):
+    """The Northeastern China Crop Map Dataset.
+
+    Link: https://www.nature.com/articles/s41597-021-00827-9
+
+    This dataset produced annual 10-m crop maps of the
+    major crops (maize, soybean, and rice)
+    in Northeast China from 2017 to 2019, using hierarchial mapping strategies,
+    random forest classifiers, interpolated and
+    smoothed 10-day Sentinel-2 time series data and
+    optimized features from spectral, temporal and
+    textural characteristics of the land surface.
+    The resultant maps have high overall accuracies (OA)
+    based on ground truth data. The dataset contains information
+    specific to three years: 2017, 2018, 2019.
+
+    The dataset contains 5 classes:
+
+    0. paddy rice
+    1. maize
+    2. soybean
+    3. others crops and lands
+    4. nodata
+
+    Dataset format:
+
+    * Three .TIF files containing the labels
+    * JavaScript code to download images from the dataset.
+
+    If you use this dataset in your research, please cite the following paper:
+
+    * https://doi.org/10.1038/s41597-021-00827-9
+
+    .. versionadded:: 0.6
+    """
+
+    filename_regex = r"CDL(?P<year>\d{4})_clip"
+    filename_glob = "CDL*.*"
+    zipfile_glob = "13090442.zip"
+
+    date_format = "%Y"
+    is_image = False
+    url = "https://figshare.com/ndownloader/articles/13090442/versions/1"
+    md5 = "eae952f1b346d7e649d027e8139a76f5"
+
+    cmap = {
+        0: (0, 255, 0, 255),
+        1: (255, 0, 0, 255),
+        2: (255, 255, 0, 255),
+        3: (128, 128, 128, 255),
+        15: (255, 255, 255, 255),
+    }
+
+    def __init__(
+        self,
+        paths: Union[pathlib.Path, str, Iterable[Union[pathlib.Path, str]]] = "data",
+        crs: Optional[CRS] = None,
+        res: Optional[float] = None,
+        transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
+        cache: bool = True,
+        download: bool = False,
+        checksum: bool = False,
+    ) -> None:
+        """Initialize a new dataset.
+
+        Args:
+            paths: one or more root directories to search or files to load
+            crs: :term:`coordinate reference system (CRS)` to warp to
+                (defaults to the CRS of the first file found)
+            res: resolution of the dataset in units of CRS
+                (defaults to the resolution of the first file found)
+            transforms: a function/transform that takes an input sample
+                and returns a transformed version
+            cache: if True, cache file handle to speed up repeated sampling
+            download: if True, download dataset and store it in the root directory
+            checksum: if True, check the MD5 after downloading files (may be slow)
+
+        Raises:
+            DatasetNotFoundError: If dataset is not found and *download* is False.
+        """
+        self.paths = paths
+        self.download = download
+        self.checksum = checksum
+        self.ordinal_map = torch.full((max(self.cmap.keys()) + 1,), 4, dtype=self.dtype)
+        self.ordinal_cmap = torch.zeros((5, 4), dtype=torch.uint8)
+
+        self._verify()
+        super().__init__(paths, crs, res, transforms=transforms, cache=cache)
+
+        for i, (k, v) in enumerate(self.cmap.items()):
+            self.ordinal_map[k] = i
+            self.ordinal_cmap[i] = torch.tensor(v)
+
+    def __getitem__(self, query: BoundingBox) -> dict[str, Any]:
+        """Retrieve mask and metadata indexed by query.
+
+        Args:
+            query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index
+
+        Returns:
+            sample of mask and metadata at that index
+
+        Raises:
+            IndexError: if query is not found in the index
+        """
+        sample = super().__getitem__(query)
+        sample["mask"] = self.ordinal_map[sample["mask"]]
+        return sample
+
+    def _verify(self) -> None:
+        """Verify the integrity of the dataset."""
+        # Check if the extracted files already exist
+        if self.files:
+            return
+
+        # Check if the zip file has already been downloaded
+        assert isinstance(self.paths, (pathlib.Path, str))
+        pathname = os.path.join(self.paths, "**", self.zipfile_glob)
+        if glob.glob(pathname, recursive=True):
+            self._extract()
+            return
+
+        # Check if the user requested to download the dataset
+        if not self.download:
+            raise DatasetNotFoundError(self)
+
+        # Download the dataset
+        self._download()
+        self._extract()
+
+    def _download(self) -> None:
+        """Download the dataset."""
+        filename = "13090442.zip"
+        download_url(
+            self.url, self.paths, filename, md5=self.md5 if self.checksum else None
+        )
+
+    def _extract(self) -> None:
+        """Extract the dataset."""
+        assert isinstance(self.paths, (pathlib.Path, str))
+        pathname = os.path.join(self.paths, "**", self.zipfile_glob)
+        extract_archive(glob.glob(pathname, recursive=True)[0], self.paths)
+
+    def plot(
+        self,
+        sample: dict[str, Any],
+        show_titles: bool = True,
+        suptitle: Optional[str] = None,
+    ) -> Figure:
+        """Plot a sample from the dataset.
+
+        Args:
+            sample: a sample returned by :meth:`NCCM.__getitem__`
+            show_titles: flag indicating whether to show titles above each panel
+            suptitle: optional string to use as a suptitle
+
+        Returns:
+            a matplotlib Figure with the rendered sample
+        """
+        mask = sample["mask"].squeeze()
+        ncols = 1
+
+        showing_predictions = "prediction" in sample
+        if showing_predictions:
+            pred = sample["prediction"].squeeze()
+            ncols = 2
+
+        fig, axs = plt.subplots(
+            nrows=1, ncols=ncols, figsize=(ncols * 4, 4), squeeze=False
+        )
+
+        axs[0, 0].imshow(self.ordinal_cmap[mask], interpolation="none")
+        axs[0, 0].axis("off")
+
+        if show_titles:
+            axs[0, 0].set_title("Mask")
+
+        if showing_predictions:
+            axs[0, 1].imshow(self.ordinal_cmap[pred], interpolation="none")
+            axs[0, 1].axis("off")
+            if show_titles:
+                axs[0, 1].set_title("Prediction")
+
+        if suptitle is not None:
+            plt.suptitle(suptitle)
+
+        return fig
diff --git a/torchgeo/datasets/spacenet.py b/torchgeo/datasets/spacenet.py
@@ -403,7 +403,7 @@ class SpaceNet1(SpaceNet):
 
     def __init__(
         self,
-        root: str = "data",
+        root: Union[pathlib.Path, str] = "data",
         image: str = "rgb",
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
         download: bool = False,
@@ -518,7 +518,7 @@ class SpaceNet2(SpaceNet):
 
     def __init__(
         self,
-        root: str = "data",
+        root: Union[pathlib.Path, str] = "data",
         image: str = "PS-RGB",
         collections: list[str] = [],
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
@@ -638,7 +638,7 @@ class SpaceNet3(SpaceNet):
 
     def __init__(
         self,
-        root: str = "data",
+        root: Union[pathlib.Path, str] = "data",
         image: str = "PS-RGB",
         speed_mask: Optional[bool] = False,
         collections: list[str] = [],
@@ -888,7 +888,7 @@ class SpaceNet4(SpaceNet):
 
     def __init__(
         self,
-        root: str = "data",
+        root: Union[pathlib.Path, str] = "data",
         image: str = "PS-RGBNIR",
         angles: list[str] = [],
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
@@ -1188,7 +1188,7 @@ class SpaceNet6(SpaceNet):
 
     def __init__(
         self,
-        root: str = "data",
+        root: Union[pathlib.Path, str] = "data",
         image: str = "PS-RGB",
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
         download: bool = False,
@@ -1289,7 +1289,7 @@ class SpaceNet7(SpaceNet):
 
     def __init__(
         self,
-        root: str = "data",
+        root: Union[pathlib.Path, str] = "data",
         split: str = "train",
         transforms: Optional[Callable[[dict[str, Any]], dict[str, Any]]] = None,
         download: bool = False,