Skip to content

Commit

Permalink
samples: NLP import and predict samples for SDK (#276)
Browse files Browse the repository at this point in the history
* samples: NLP import and predict samples for SDK

* fix: mismatched region tags

* samples: add more import data samples

* samples: adds prediction NLP samples

* fix: lint

* samples: prediction tests

* samples: adds endpoint fixtures

* fix: lint

* Update all MBSDK Dataset sample test mocks, fix NLP sample tests

* Remove reference to ipdb

Co-authored-by: Vinny Senthil <vinnysenthil@gmail.com>
  • Loading branch information
telpirion and vinnysenthil authored Apr 12, 2021
1 parent 38eb76f commit a1819b2
Show file tree
Hide file tree
Showing 18 changed files with 666 additions and 33 deletions.
151 changes: 122 additions & 29 deletions samples/model-builder/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,48 +25,117 @@ def mock_sdk_init():
yield mock


# ----------------------------------------------------------------------------
# Dataset Fixtures
# ----------------------------------------------------------------------------
"""
----------------------------------------------------------------------------
Dataset Fixtures
----------------------------------------------------------------------------
"""

"""Dataset objects returned by SomeDataset(), create(), import_data(), etc. """


@pytest.fixture
def mock_dataset():
mock = MagicMock(aiplatform.datasets.Dataset)
def mock_image_dataset():
mock = MagicMock(aiplatform.datasets.ImageDataset)
yield mock


@pytest.fixture
def mock_new_dataset(mock_dataset):
with patch.object(aiplatform.datasets.Dataset, "__new__") as mock_new_dataset:
mock_new_dataset.return_value = mock_dataset
yield mock_new_dataset
def mock_tabular_dataset():
mock = MagicMock(aiplatform.datasets.TabularDataset)
yield mock


@pytest.fixture
def mock_init_dataset(mock_new_dataset):
with patch.object(aiplatform.datasets.Dataset, "__init__") as mock_init_dataset:
mock_init_dataset.return_value = None
yield mock_init_dataset
def mock_text_dataset():
mock = MagicMock(aiplatform.datasets.TextDataset)
yield mock


@pytest.fixture
def mock_create_dataset():
with patch.object(aiplatform.datasets.Dataset, "create") as mock:
mock.return_value = MagicMock(aiplatform.Dataset)
yield mock
def mock_video_dataset():
mock = MagicMock(aiplatform.datasets.VideoDataset)
yield mock


"""Mocks for getting an existing Dataset, i.e. ds = aiplatform.ImageDataset(...) """


@pytest.fixture
def mock_get_image_dataset(mock_image_dataset):
with patch.object(aiplatform, "ImageDataset") as mock_get_image_dataset:
mock_get_image_dataset.return_value = mock_image_dataset
yield mock_get_image_dataset


@pytest.fixture
def mock_create_image_dataset():
with patch.object(aiplatform.datasets.ImageDataset, "create") as mock:
mock.return_value = MagicMock(aiplatform.Dataset)
def mock_get_tabular_dataset(mock_tabular_dataset):
with patch.object(aiplatform, "TabularDataset") as mock_get_tabular_dataset:
mock_get_tabular_dataset.return_value = mock_tabular_dataset
yield mock_get_tabular_dataset


@pytest.fixture
def mock_get_text_dataset(mock_text_dataset):
with patch.object(aiplatform, "TextDataset") as mock_get_text_dataset:
mock_get_text_dataset.return_value = mock_text_dataset
yield mock_get_text_dataset


@pytest.fixture
def mock_get_video_dataset(mock_video_dataset):
with patch.object(aiplatform, "VideoDataset") as mock_get_video_dataset:
mock_get_video_dataset.return_value = mock_video_dataset
yield mock_get_video_dataset


"""Mocks for creating a new Dataset, i.e. aiplatform.ImageDataset.create(...) """


@pytest.fixture
def mock_create_image_dataset(mock_image_dataset):
with patch.object(aiplatform.ImageDataset, "create") as mock_create_image_dataset:
mock_create_image_dataset.return_value = mock_image_dataset
yield mock_create_image_dataset


@pytest.fixture
def mock_create_tabular_dataset(mock_tabular_dataset):
with patch.object(
aiplatform.TabularDataset, "create"
) as mock_create_tabular_dataset:
mock_create_tabular_dataset.return_value = mock_tabular_dataset
yield mock_create_tabular_dataset


@pytest.fixture
def mock_create_text_dataset(mock_text_dataset):
with patch.object(aiplatform.TextDataset, "create") as mock_create_text_dataset:
mock_create_text_dataset.return_value = mock_text_dataset
yield mock_create_text_dataset


@pytest.fixture
def mock_create_video_dataset(mock_video_dataset):
with patch.object(aiplatform.VideoDataset, "create") as mock_create_video_dataset:
mock_create_video_dataset.return_value = mock_video_dataset
yield mock_create_video_dataset


"""Mocks for SomeDataset.import_data() """


@pytest.fixture
def mock_import_text_dataset(mock_text_dataset):
with patch.object(mock_text_dataset, "import_data") as mock:
yield mock


# ----------------------------------------------------------------------------
# TrainingJob Fixtures
# ----------------------------------------------------------------------------
"""
----------------------------------------------------------------------------
TrainingJob Fixtures
----------------------------------------------------------------------------
"""


@pytest.fixture
Expand All @@ -84,9 +153,11 @@ def mock_run_automl_image_training_job():
yield mock


# ----------------------------------------------------------------------------
# Model Fixtures
# ----------------------------------------------------------------------------
"""
----------------------------------------------------------------------------
Model Fixtures
----------------------------------------------------------------------------
"""


@pytest.fixture
Expand All @@ -102,12 +173,34 @@ def mock_batch_predict_model():
yield mock


# ----------------------------------------------------------------------------
# Job Fixtures
# ----------------------------------------------------------------------------
"""
----------------------------------------------------------------------------
Job Fixtures
----------------------------------------------------------------------------
"""


@pytest.fixture
def mock_create_batch_prediction_job():
with patch.object(aiplatform.jobs.BatchPredictionJob, "create") as mock:
yield mock


"""
----------------------------------------------------------------------------
Endpoint Fixtures
----------------------------------------------------------------------------
"""


@pytest.fixture
def mock_endpoint():
mock = MagicMock(aiplatform.models.Endpoint)
yield mock


@pytest.fixture
def mock_get_endpoint(mock_endpoint):
with patch.object(aiplatform, "Endpoint") as mock_get_endpoint:
mock_get_endpoint.return_value = mock_endpoint
yield mock_get_endpoint
43 changes: 43 additions & 0 deletions samples/model-builder/create_and_import_dataset_text_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Union

from google.cloud import aiplatform

# [START aiplatform_sdk_create_and_import_dataset_text_sample]
def create_and_import_dataset_text_sample(
project: str,
location: str,
display_name: str,
src_uris: Union[str, List[str]],
sync: bool = True,
):
aiplatform.init(project=project, location=location)

ds = aiplatform.TextDataset.create(
display_name=display_name,
gcs_source=src_uris,
import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification,
sync=sync,
)

ds.wait()

print(ds.display_name)
print(ds.resource_name)
return ds


# [END aiplatform_sdk_create_and_import_dataset_text_sample]
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import test_constants as constants
import create_and_import_dataset_text_sample

from google.cloud.aiplatform import schema


def test_create_and_import_dataset_text_sample(mock_sdk_init, mock_create_text_dataset):

create_and_import_dataset_text_sample.create_and_import_dataset_text_sample(
project=constants.PROJECT,
location=constants.LOCATION,
src_uris=constants.GCS_SOURCES,
display_name=constants.DISPLAY_NAME,
)

mock_sdk_init.assert_called_once_with(
project=constants.PROJECT, location=constants.LOCATION
)
mock_create_text_dataset.assert_called_once_with(
display_name=constants.DISPLAY_NAME,
gcs_source=constants.GCS_SOURCES,
import_schema_uri=schema.dataset.ioformat.text.single_label_classification,
sync=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def create_training_pipeline_image_classification_sample(

job = aiplatform.AutoMLImageTrainingJob(display_name=display_name)

my_image_ds = aiplatform.Dataset(dataset_id)
my_image_ds = aiplatform.ImageDataset(dataset_id)

model = job.run(
dataset=my_image_ds,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import create_training_pipeline_image_classification_sample


@pytest.mark.usefixtures("mock_init_dataset")
def test_create_training_pipeline_image_classification_sample(
mock_sdk_init,
mock_image_dataset,
mock_init_automl_image_training_job,
mock_dataset,
mock_run_automl_image_training_job,
mock_get_image_dataset,
):

create_training_pipeline_image_classification_sample.create_training_pipeline_image_classification_sample(
Expand All @@ -38,14 +38,16 @@ def test_create_training_pipeline_image_classification_sample(
disable_early_stopping=False,
)

mock_get_image_dataset.assert_called_once_with(constants.RESOURCE_ID)

mock_sdk_init.assert_called_once_with(
project=constants.PROJECT, location=constants.LOCATION
)
mock_init_automl_image_training_job.assert_called_once_with(
display_name=constants.DISPLAY_NAME
)
mock_run_automl_image_training_job.assert_called_once_with(
dataset=mock_dataset,
dataset=mock_image_dataset,
model_display_name=constants.DISPLAY_NAME_2,
training_fraction_split=constants.TRAINING_FRACTION_SPLIT,
validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Union

from google.cloud import aiplatform

# [START aiplatform_sdk_import_data_text_classification_single_label_sample]
def import_data_text_classification_single_label(
project: str,
location: str,
dataset: str,
src_uris: Union[str, List[str]],
sync: bool = True,
):
aiplatform.init(project=project, location=location)

ds = aiplatform.TextDataset(dataset)
ds.import_data(
gcs_source=src_uris,
import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification,
sync=sync,
)

ds.wait()

print(ds.display_name)
print(ds.resource_name)
return ds


# [END aiplatform_sdk_import_data_text_classification_single_label_sample]
Loading

0 comments on commit a1819b2

Please sign in to comment.