From 9384cee5193a1a230b0424989c64d7e5a695e4e8 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Wed, 20 Jul 2022 11:00:45 +0200 Subject: [PATCH 1/8] add utils funtion to retrieve all values for a key in a schema --- .../source_acceptance_test/utils/common.py | 18 ++++++++++- .../unit_tests/test_utils.py | 32 ++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py index 9caefefb90110..1c8c45db021c6 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py @@ -83,7 +83,10 @@ def find_key_inside_schema(schema_item: Union[dict, list, str], key: str = "$ref return item -def find_keyword_schema(schema: Union[dict, list, str], key: str) -> bool: +def find_keyword_schema( + schema: Union[dict, list, str], + key: str, +) -> bool: """Find at least one keyword in a schema, skip object properties""" def _find_keyword(schema, key, _skip=False): @@ -114,3 +117,16 @@ def load_yaml_or_json_path(path: Path): return load(file_data, Loader=Loader) else: raise RuntimeError("path must be a '.yaml' or '.json' file") + + +def find_all_values_for_key_in_schema(schema: dict, searched_key: str): + """Retrieve all (nested) values in a schema for a specific searched key""" + if isinstance(schema, list): + for schema_item in schema: + yield from find_all_values_for_key_in_schema(schema_item, searched_key) + if isinstance(schema, dict): + for key, value in schema.items(): + if key == searched_key: + yield value + if isinstance(value, dict) or isinstance(value, list): + yield from find_all_values_for_key_in_schema(value, searched_key) diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py index 4ea8eabf3bc4b..78ec12e89126d 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_utils.py @@ -16,7 +16,7 @@ import pytest import yaml from docker.errors import ContainerError, NotFound -from source_acceptance_test.utils.common import load_yaml_or_json_path +from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, load_yaml_or_json_path from source_acceptance_test.utils.compare import make_hashable from source_acceptance_test.utils.connector_runner import ConnectorRunner @@ -346,3 +346,33 @@ def test_load_other(self): with tempfile.NamedTemporaryFile("w", suffix=".txt") as f: with pytest.raises(RuntimeError): load_yaml_or_json_path(Path(f.name)) + + +@pytest.mark.parametrize( + "schema, searched_key, expected_values", + [ + ( + { + "looking_for_this_key": "first_match", + "foo": "bar", + "bar": {"looking_for_this_key": "second_match"}, + "top_level_list": [ + {"looking_for_this_key": "third_match"}, + {"looking_for_this_key": "fourth_match"}, + "dump_value", + {"nested_in_list": {"looking_for_this_key": "fifth_match"}}, + ], + }, + "looking_for_this_key", + ["first_match", "second_match", "third_match", "fourth_match", "fifth_match"], + ), + ({"foo": "bar", "bar": {"looking_for_this_key": "single_match"}}, "looking_for_this_key", ["single_match"]), + ( + ["foo", "bar", {"looking_for_this_key": "first_match"}, [{"looking_for_this_key": "second_match"}]], + "looking_for_this_key", + ["first_match", "second_match"], + ), + ], +) +def test_find_all_values_for_key_in_schema(schema, searched_key, expected_values): + assert list(find_all_values_for_key_in_schema(schema, searched_key)) == expected_values From 43256e1556ff8bc797d7109969ed836f4a050447 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Wed, 20 Jul 2022 11:01:24 +0200 Subject: [PATCH 2/8] move does_not_raise to conftest --- .../bases/source-acceptance-test/unit_tests/conftest.py | 6 ++++++ .../bases/source-acceptance-test/unit_tests/test_core.py | 6 +----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/conftest.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/conftest.py index b3daaa5f36ecf..b93933d966fd8 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/conftest.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/conftest.py @@ -3,6 +3,7 @@ # import json +from contextlib import contextmanager import pytest @@ -17,3 +18,8 @@ def mssql_spec_schema(): def postgres_source_spec_schema(): with open("unit_tests/data/postgres_spec.json") as f: return json.load(f).get("connectionSpecification") + + +@contextmanager +def does_not_raise(): + yield diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py index 35f2c9fb36619..5be34d27324e4 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py @@ -2,7 +2,6 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from contextlib import contextmanager from unittest.mock import MagicMock, patch import pytest @@ -23,10 +22,7 @@ from source_acceptance_test.tests.test_core import TestBasicRead as _TestBasicRead from source_acceptance_test.tests.test_core import TestDiscovery as _TestDiscovery - -@contextmanager -def does_not_raise(): - yield +from .conftest import does_not_raise @pytest.mark.parametrize( From 209bd95c5b0b76c0607c5020a7c64b406ce8eb2c Mon Sep 17 00:00:00 2001 From: alafanechere Date: Wed, 20 Jul 2022 11:03:35 +0200 Subject: [PATCH 3/8] add new test in SAT to check that all additionalProperties in spec are set to True + write unit test --- .../source_acceptance_test/tests/test_core.py | 17 ++++++++- .../unit_tests/test_spec.py | 38 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py index eada433cafb91..15e36c7708379 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py @@ -29,7 +29,7 @@ from source_acceptance_test.base import BaseTest from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema -from source_acceptance_test.utils.common import find_key_inside_schema, find_keyword_schema +from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_key_inside_schema, find_keyword_schema from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure @@ -164,6 +164,21 @@ def test_oauth_flow_parameters(self, actual_connector_spec: ConnectorSpecificati diff = params - schema_path assert diff == set(), f"Specified oauth fields are missed from spec schema: {diff}" + def test_additional_properties_is_true(self, actual_connector_spec): + """Check that value of the "additionalProperties" field is always true. + A spec declaring "additionalProperties": false introduces the risk of accidental breaking changes. + Specifically, when removing a property from the spec, existing connector configs will no longer be valid. + False value introduces the risk of accidental breaking changes. + Specifically, when removing a property from the spec, existing connector configs will no longer be valid. + Read https://github.com/airbytehq/airbyte/issues/14196 for more details""" + additional_properties_values = find_all_values_for_key_in_schema( + actual_connector_spec.connectionSpecification, "additionalProperties" + ) + if additional_properties_values: + assert all( + [additional_properties_value is True for additional_properties_value in additional_properties_values] + ), "When set additionalProperties field value must be true for backward compatibility." + @pytest.mark.default_timeout(30) class TestConnection(BaseTest): diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py index ea8fe2d57dc9d..90d5a4b11a37c 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py @@ -8,6 +8,8 @@ from airbyte_cdk.models import ConnectorSpecification from source_acceptance_test.tests.test_core import TestSpec as _TestSpec +from .conftest import does_not_raise + @pytest.mark.parametrize( "connector_spec, should_fail", @@ -567,3 +569,39 @@ def test_validate_oauth_flow(connector_spec, expected_error): t.test_oauth_flow_parameters(connector_spec) else: t.test_oauth_flow_parameters(connector_spec) + + +@pytest.mark.parametrize( + "connector_spec, expectation", + [ + (ConnectorSpecification(connectionSpecification={}), does_not_raise()), + (ConnectorSpecification(connectionSpecification={"type": "object", "additionalProperties": True}), does_not_raise()), + (ConnectorSpecification(connectionSpecification={"type": "object", "additionalProperties": False}), pytest.raises(AssertionError)), + ( + ConnectorSpecification( + connectionSpecification={ + "type": "object", + "additionalProperties": True, + "properties": {"start_date": {"type": "object", "additionalProperties": "foo"}}, + } + ), + pytest.raises(AssertionError), + ), + ( + ConnectorSpecification( + connectionSpecification={ + "type": "object", + "additionalProperties": True, + "properties": { + "start_date": {"type": "object", "oneOf": [{"additionalProperties": True}, {"additionalProperties": False}]} + }, + } + ), + pytest.raises(AssertionError), + ), + ], +) +def test_additional_properties_is_true(connector_spec, expectation): + t = _TestSpec() + with expectation: + t.test_additional_properties_is_true(connector_spec) From 678712d7bf40d503dd1906deb8f1a9924ce5a1d8 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Wed, 20 Jul 2022 14:35:15 +0200 Subject: [PATCH 4/8] clean test for spec --- .../bases/source-acceptance-test/unit_tests/test_spec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py index 90d5a4b11a37c..3a09ce07f887f 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_spec.py @@ -582,7 +582,7 @@ def test_validate_oauth_flow(connector_spec, expected_error): connectionSpecification={ "type": "object", "additionalProperties": True, - "properties": {"start_date": {"type": "object", "additionalProperties": "foo"}}, + "properties": {"my_object": {"type": "object", "additionalProperties": "foo"}}, } ), pytest.raises(AssertionError), @@ -593,7 +593,7 @@ def test_validate_oauth_flow(connector_spec, expected_error): "type": "object", "additionalProperties": True, "properties": { - "start_date": {"type": "object", "oneOf": [{"additionalProperties": True}, {"additionalProperties": False}]} + "my_oneOf_object": {"type": "object", "oneOf": [{"additionalProperties": True}, {"additionalProperties": False}]} }, } ), From 2581129239ed707b88fc83365a050ed5c65311d3 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Wed, 20 Jul 2022 14:35:49 +0200 Subject: [PATCH 5/8] add same test for streams --- .../source_acceptance_test/tests/test_core.py | 16 ++++++- .../unit_tests/test_core.py | 46 +++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py index 15e36c7708379..ea2359cdea7a3 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py @@ -169,7 +169,6 @@ def test_additional_properties_is_true(self, actual_connector_spec): A spec declaring "additionalProperties": false introduces the risk of accidental breaking changes. Specifically, when removing a property from the spec, existing connector configs will no longer be valid. False value introduces the risk of accidental breaking changes. - Specifically, when removing a property from the spec, existing connector configs will no longer be valid. Read https://github.com/airbytehq/airbyte/issues/14196 for more details""" additional_properties_values = find_all_values_for_key_in_schema( actual_connector_spec.connectionSpecification, "additionalProperties" @@ -177,7 +176,7 @@ def test_additional_properties_is_true(self, actual_connector_spec): if additional_properties_values: assert all( [additional_properties_value is True for additional_properties_value in additional_properties_values] - ), "When set additionalProperties field value must be true for backward compatibility." + ), "When set, additionalProperties field value must be true for backward compatibility." @pytest.mark.default_timeout(30) @@ -264,6 +263,19 @@ def test_streams_has_sync_modes(self, discovered_catalog: Mapping[str, Any]): assert stream.supported_sync_modes is not None, f"The stream {stream.name} is missing supported_sync_modes field declaration." assert len(stream.supported_sync_modes) > 0, f"supported_sync_modes list on stream {stream.name} should not be empty." + def test_additional_properties_is_true(self, discovered_catalog: Mapping[str, Any]): + """Check that value of the "additionalProperties" field is always true. + A stream schema declaring "additionalProperties": false introduces the risk of accidental breaking changes. + Specifically, when removing a property from the stream schema, existing connector catalog will no longer be valid. + False value introduces the risk of accidental breaking changes. + Read https://github.com/airbytehq/airbyte/issues/14196 for more details""" + for stream in discovered_catalog.values(): + additional_properties_values = list(find_all_values_for_key_in_schema(stream.json_schema, "additionalProperties")) + if additional_properties_values: + assert all( + [additional_properties_value is True for additional_properties_value in additional_properties_values] + ), "When set, additionalProperties field value must be true for backward compatibility." + def primary_keys_for_records(streams, records): streams_with_primary_key = [stream for stream in streams if stream.stream.source_defined_primary_key] diff --git a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py index 5be34d27324e4..56b39d899564b 100644 --- a/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/unit_tests/test_core.py @@ -151,6 +151,52 @@ def test_supported_sync_modes_in_stream(discovered_catalog, expectation): t.test_streams_has_sync_modes(discovered_catalog) +@pytest.mark.parametrize( + "discovered_catalog, expectation", + [ + ({"test_stream_1": AirbyteStream.parse_obj({"name": "test_stream_1", "json_schema": {}})}, does_not_raise()), + ( + {"test_stream_2": AirbyteStream.parse_obj({"name": "test_stream_2", "json_schema": {"additionalProperties": True}})}, + does_not_raise(), + ), + ( + {"test_stream_3": AirbyteStream.parse_obj({"name": "test_stream_3", "json_schema": {"additionalProperties": False}})}, + pytest.raises(AssertionError), + ), + ( + {"test_stream_4": AirbyteStream.parse_obj({"name": "test_stream_4", "json_schema": {"additionalProperties": "foo"}})}, + pytest.raises(AssertionError), + ), + ( + { + "test_stream_5": AirbyteStream.parse_obj( + { + "name": "test_stream_5", + "json_schema": {"additionalProperties": True, "properties": {"my_object": {"additionalProperties": True}}}, + } + ) + }, + does_not_raise(), + ), + ( + { + "test_stream_6": AirbyteStream.parse_obj( + { + "name": "test_stream_6", + "json_schema": {"additionalProperties": True, "properties": {"my_object": {"additionalProperties": False}}}, + } + ) + }, + pytest.raises(AssertionError), + ), + ], +) +def test_additional_properties_is_true(discovered_catalog, expectation): + t = _TestDiscovery() + with expectation: + t.test_additional_properties_is_true(discovered_catalog) + + @pytest.mark.parametrize( "schema, record, should_fail", [ From 331002c1ac06c87041572afb2527475543130f8a Mon Sep 17 00:00:00 2001 From: alafanechere Date: Wed, 20 Jul 2022 15:03:51 +0200 Subject: [PATCH 6/8] revert unrelated changes --- .../source_acceptance_test/utils/common.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py index 1c8c45db021c6..36e003aa9f9fb 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py @@ -83,10 +83,7 @@ def find_key_inside_schema(schema_item: Union[dict, list, str], key: str = "$ref return item -def find_keyword_schema( - schema: Union[dict, list, str], - key: str, -) -> bool: +def find_keyword_schema(schema: Union[dict, list, str], key: str) -> bool: """Find at least one keyword in a schema, skip object properties""" def _find_keyword(schema, key, _skip=False): From edce556f83f155315991a926279dba332e9f9a48 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Thu, 21 Jul 2022 10:31:08 +0200 Subject: [PATCH 7/8] replace find_key_inside_schema with find_all_values_for_key_in_schema --- .../source_acceptance_test/tests/test_core.py | 9 ++++----- .../source_acceptance_test/utils/common.py | 16 ---------------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py index ea2359cdea7a3..836301dbe2e01 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py @@ -29,7 +29,7 @@ from source_acceptance_test.base import BaseTest from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema -from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_key_inside_schema, find_keyword_schema +from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_keyword_schema from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure @@ -135,8 +135,7 @@ def test_secret_never_in_the_output(self): def test_defined_refs_exist_in_json_spec_file(self, connector_spec_dict: dict): """Checking for the presence of unresolved `$ref`s values within each json spec file""" - check_result = find_key_inside_schema(schema_item=connector_spec_dict) - + check_result = list(find_all_values_for_key_in_schema(connector_spec_dict, "$ref")) assert not check_result, "Found unresolved `$refs` value in spec.json file" def test_oauth_flow_parameters(self, actual_connector_spec: ConnectorSpecification): @@ -231,8 +230,8 @@ def test_defined_refs_exist_in_schema(self, discovered_catalog: Mapping[str, Any """Check the presence of unresolved `$ref`s values within each json schema.""" schemas_errors = [] for stream_name, stream in discovered_catalog.items(): - check_result = find_key_inside_schema(schema_item=stream.json_schema, key="$ref") - if check_result is not None: + check_result = list(find_all_values_for_key_in_schema(stream.json_schema, "$ref")) + if check_result: schemas_errors.append({stream_name: check_result}) assert not schemas_errors, f"Found unresolved `$refs` values for selected streams: {tuple(schemas_errors)}." diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py index 36e003aa9f9fb..35fa082bea94c 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/common.py @@ -67,22 +67,6 @@ def __repr__(self) -> str: return str(self) -def find_key_inside_schema(schema_item: Union[dict, list, str], key: str = "$ref") -> dict: - """Checking the incoming schema for the presence of a `$ref` object in it""" - if isinstance(schema_item, list): - for list_schema_item in schema_item: - item = find_key_inside_schema(list_schema_item, key) - if item is not None: - return item - elif isinstance(schema_item, dict): - if key in schema_item: - return schema_item - for schema_object_value in schema_item.values(): - item = find_key_inside_schema(schema_object_value, key) - if item is not None: - return item - - def find_keyword_schema(schema: Union[dict, list, str], key: str) -> bool: """Find at least one keyword in a schema, skip object properties""" From 1a022008d6441f81bff71a9064df2ed413bef8d7 Mon Sep 17 00:00:00 2001 From: alafanechere Date: Fri, 22 Jul 2022 14:26:22 +0200 Subject: [PATCH 8/8] bump --- airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md | 3 +++ airbyte-integrations/bases/source-acceptance-test/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md index 3664750157600..02a0d3b990f95 100644 --- a/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md +++ b/airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.56 +Add test case in `TestDiscovery` and `TestConnection` to assert `additionalProperties` fields are set to true if they are declared [#14878](https://github.com/airbytehq/airbyte/pull/14878/). + ## 0.1.55 Add test case in `TestDiscovery` to assert `supported_sync_modes` stream field in catalog is set and not empty. diff --git a/airbyte-integrations/bases/source-acceptance-test/Dockerfile b/airbyte-integrations/bases/source-acceptance-test/Dockerfile index c56ba814a7203..5f5c2f2410343 100644 --- a/airbyte-integrations/bases/source-acceptance-test/Dockerfile +++ b/airbyte-integrations/bases/source-acceptance-test/Dockerfile @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./ COPY source_acceptance_test ./source_acceptance_test RUN pip install . -LABEL io.airbyte.version=0.1.55 +LABEL io.airbyte.version=0.1.56 LABEL io.airbyte.name=airbyte/source-acceptance-test ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]