Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Generator Class #632

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/generator.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Pixels Data Generator
A [DataGenerator](https://github.com/tesselo/pixels/blob/main/pixels/generator/generator.py) is used to feed pixels data to our AI models. This generator transforms the raw imagery stacks from the pixels collection into a format that can be passed to an AI model.
A [Generator](https://github.com/tesselo/pixels/blob/main/pixels/generator/generator.py) is used to feed pixels data to our AI models. This generator transforms the raw imagery stacks from the pixels collection into a format that can be passed to an AI model.

For super-resolution models, the generator is able to change the resolution of the imagery data.

@@ -195,7 +195,7 @@ Path to for the downloaded data directory. If `download_dir` is not set and
## Supporting arguments
This section documents a number of arguments that can be set as generator
arguments in the stac training algorithm, that are not direct arguments of the
DataGenerator. These additional arguments are related with formatting the input
Generator. These additional arguments are related with formatting the input
and output of the models, as well as handling special cases during prediction.

### Variable prediction image size
8 changes: 4 additions & 4 deletions docs/how-to-guides/running_the_generator.md
Original file line number Diff line number Diff line change
@@ -7,13 +7,13 @@ The arguments definitions can be seen here:[Pixels Data Generator](../generator.
When running the generator locally multiple times, it can make sense to download the data so that subsequent runs are faster. To do so, use the following additional parameters: ```download_data``` and ```download_dir```.

```python
from pixels.stac_generator.generator import DataGenerator
from pixels.stac_generator.generator import Generator

# Path to Collection dictionary.
# It can be on s3 or locally, it has to be a catalogs_dict.json representing the collection.
path_collection_catalog = 's3://bucket-key/pixelsdata/collection_id_key/data/catalogs_dict.json'

data_training_generator = DataGenerator(
data_training_generator = Generator(
path_collection_catalog=path_collection_catalog,
random_seed = 23,
split=0.8,
@@ -38,7 +38,7 @@ Considering a dataset with 1000 samples.

```python
# This creates a generator with the remaining 20% that were not used in the training.
>>> data_evaluation_generator = DataGenerator(
>>> data_evaluation_generator = Generator(
... # Same as in training
split=0.2,
usage_type="evaluation",
@@ -52,7 +52,7 @@ Considering a dataset with 1000 samples.
```python
# This creates a generator with the 10% of the full dataset, only fetching samples not used in the training.

>>> data_evaluation_generator = DataGenerator(
>>> data_evaluation_generator = Generator(
... # Same as in training
split=0.1,
usage_type="evaluation",
32,102 changes: 32,102 additions & 0 deletions get-pip.py

Large diffs are not rendered by default.

56 changes: 27 additions & 29 deletions pixels/generator/augmentation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from typing import Union

import numpy as np

from pixels.generator.types import XShape2D, XShape3D, YShapeND

AUGMENTATION_FACTOR = 4


@@ -85,7 +89,7 @@ def change_bright(image, ran=1):
return np.array(image * ran)


def apply_augmentation_to_image(img, augmentation_index):
def augment_image(img, augmentation_index):
if augmentation_index is None or augmentation_index == 1:
return img_flip(img)
if augmentation_index is None or augmentation_index == 2:
@@ -98,13 +102,13 @@ def apply_augmentation_to_image(img, augmentation_index):
return change_bright(img, ran=10)


def apply_augmentation_to_stack(imgs, augmentation_index):
def augment_stack(imgs, augmentation_index):
# Do the augmentations on images (number_occurrences, bands, height, width).
time_aug_imgs = []
for number_occurrences in imgs:
aug_img = []
for bands in number_occurrences:
aug_img.append(apply_augmentation_to_image(bands, augmentation_index))
aug_img.append(augment_image(bands, augmentation_index))
time_aug_imgs.append(aug_img)
# Revert shapes back to (number_occurrences, height, width, bands)
time_aug_imgs = np.array(time_aug_imgs)
@@ -124,31 +128,29 @@ def apply_augmentation_to_stack(imgs, augmentation_index):
def augmentation(
X,
Y,
sizeX_height=None,
sizeX_width=None,
sizeY_height=None,
sizeY_width=None,
x_shape: Union[XShape2D, XShape3D],
y_shape: YShapeND,
augmentation_index=None,
):
# To make the augmentations in a standard mode we need to
# get the tensors on the same shape, and the same number of dimensions.
data_X = set_standard_shape(X, sizex=sizeX_height, sizey=sizeX_width)
data_Y = set_standard_shape(Y, sizex=sizeY_height, sizey=sizeY_width)
data_X = set_standard_shape(X, sizex=x_shape.height, sizey=x_shape.width)
data_Y = set_standard_shape(Y, sizex=y_shape.height, sizey=y_shape.width)
data_Y = np.squeeze(data_Y)
data_X = np.squeeze(data_X)
if len(data_X.shape) < 4:
data_X = np.expand_dims(data_X, list(np.arange(4 - len(data_X.shape))))
if len(data_Y.shape) < 4:
data_Y = np.expand_dims(data_Y, list(np.arange(4 - len(data_Y.shape))))
resulted_augmentation_X = [X[0]]
resulted_augmentation_Y = [Y]
augmentation_X = [X[0]]
augmentation_Y = [Y]
for i in augmentation_index:
resulted_augmentation_X.append(apply_augmentation_to_stack(data_X, i))
resulted_augmentation_Y.append(apply_augmentation_to_stack(data_Y, i))
resulted_augmentation_Y = np.squeeze(resulted_augmentation_Y)
augmentation_X.append(augment_stack(data_X, i))
augmentation_Y.append(augment_stack(data_Y, i))
augmentation_Y = np.squeeze(augmentation_Y)
return (
resulted_augmentation_X,
resulted_augmentation_Y,
augmentation_X,
augmentation_Y,
)
# Flip
# Add noise
@@ -158,13 +160,9 @@ def augmentation(
def do_augmentation_on_batch(
X,
Y,
sizeX_height,
sizeX_width,
sizeY_height,
sizeY_width,
x_shape: Union[XShape2D, XShape3D],
y_shape: YShapeND,
augmentation_index=1,
batch_size=1,
mode="3D_Model",
):
"""
Define how many augmentations to do, and build the correct input for the augmentation function
@@ -175,6 +173,8 @@ def do_augmentation_on_batch(
Set of collected images.
Y : numpy array
Goal image in training.
x_shape : XShape2D or XShape3D
y_shape : YShapeND
augmentation_index : int or list
Set the number of augmentations. If it is a list, does the augmentations
with the keys on the list, if it is an int, does all the keys up to that.
@@ -192,16 +192,14 @@ def do_augmentation_on_batch(
augmentation_index = np.arange(augmentation_index) + 1
batch_X = np.array([])
batch_Y = np.array([])
if mode == "2D_Model":
if len(x_shape) < 5:
X = np.expand_dims(X, 1)
for batch in range(batch_size):
for batch in range(x_shape.batch):
aug_X, aug_Y = augmentation(
X[batch : batch + 1],
Y[batch : batch + 1],
sizeX_height=sizeX_height,
sizeX_width=sizeX_width,
sizeY_height=sizeY_height,
sizeY_width=sizeY_width,
x_shape,
y_shape,
augmentation_index=augmentation_index,
)
if not batch_X.any():
@@ -210,7 +208,7 @@ def do_augmentation_on_batch(
else:
batch_X = np.concatenate([batch_X, aug_X])
batch_Y = np.concatenate([batch_Y, aug_Y])
if mode == "2D_Model":
if len(x_shape) < 5:
batch_X = np.vstack(batch_X)
if len(batch_Y.shape) < 4:
batch_Y = np.expand_dims(batch_Y, -1)
4 changes: 1 addition & 3 deletions pixels/generator/filters.py
Original file line number Diff line number Diff line change
@@ -25,10 +25,9 @@ def _make_mask_on_value(img, mask_value):
return mask_img


def order_tensor_on_masks(images, mask_value, max_images=12):
def order_tensor_on_masks(images: np.array, mask_value: float, max_images: int = 12):
"""
Order a set of images based on a mask count.

Parameters
----------
images : array
@@ -37,7 +36,6 @@ def order_tensor_on_masks(images, mask_value, max_images=12):
Value to create mask.
max_images : int
The maximum number of images to return

Returns
-------
image : numpy array
Loading