diff --git a/RELEASE.md b/RELEASE.md index 262d8e3792..36fd39ea9f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -4,6 +4,7 @@ ## Bug fixes and other changes * Updated error message for invalid catalog entries. +* Updated error message for catalog entries when the dataset class is not found with hints on how to resolve the issue. * Fixed a bug in the `DataCatalog` `shallow_copy()` method to ensure it returns the type of the used catalog and doesn't cast it to `DataCatalog`. ## Breaking changes to the API diff --git a/kedro/io/core.py b/kedro/io/core.py index ec1b25bfea..ff388a50ed 100644 --- a/kedro/io/core.py +++ b/kedro/io/core.py @@ -155,7 +155,7 @@ def from_config( except Exception as exc: raise DatasetError( f"An exception occurred when parsing config " - f"for dataset '{name}':\n{str(exc)}." + f"for dataset '{name}':\n{str(exc)}" ) from exc try: @@ -406,7 +406,23 @@ def parse_dataset_definition( class_obj = tmp break else: - raise DatasetError(f"Class '{dataset_type}' not found, is this a typo?") + hint = "" + if "DataSet" in dataset_type: + hint = ( # pragma: no cover # To remove when we drop support for python 3.8 + "Hint: If you are trying to use a dataset from `kedro-datasets`>=2.0.0, " + "make sure that the dataset name uses the `Dataset` spelling instead of `DataSet`." + ) + else: + hint = ( + "Hint: If you are trying to use a dataset from `kedro-datasets`, " + "make sure that the package is installed in your current environment. " + "You can do so by running `pip install kedro-datasets` or " + "`pip install kedro-datasets[]` to install `kedro-datasets` along with " + "related dependencies for the specific dataset group." + ) + raise DatasetError( + f"Class '{dataset_type}' not found, is this a typo?" f"\n{hint}" + ) if not class_obj: class_obj = dataset_type diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index 26c3659cef..1deecb7a0f 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -1,5 +1,6 @@ import logging import re +import sys from copy import deepcopy from datetime import datetime, timezone from pathlib import Path @@ -503,7 +504,24 @@ def test_config_missing_class(self, sane_config): pattern = ( "An exception occurred when parsing config for dataset 'boats':\n" - "Class 'kedro.io.CSVDatasetInvalid' not found" + "Class 'kedro.io.CSVDatasetInvalid' not found, is this a typo?" + ) + with pytest.raises(DatasetError, match=re.escape(pattern)): + DataCatalog.from_config(**sane_config) + + @pytest.mark.skipif( + sys.version_info < (3, 9), + reason="for python 3.8 kedro-datasets version 1.8 is used which has the old spelling", + ) + def test_config_incorrect_spelling(self, sane_config): + """Check hint if the type uses the old DataSet spelling""" + sane_config["catalog"]["boats"]["type"] = "pandas.CSVDataSet" + + pattern = ( + "An exception occurred when parsing config for dataset 'boats':\n" + "Class 'pandas.CSVDataSet' not found, is this a typo?" + "\nHint: If you are trying to use a dataset from `kedro-datasets`>=2.0.0," + " make sure that the dataset name uses the `Dataset` spelling instead of `DataSet`." ) with pytest.raises(DatasetError, match=re.escape(pattern)): DataCatalog.from_config(**sane_config)