Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SAT: all additionalProperties fields must be set to true #14878

Merged
merged 9 commits into from
Jul 22, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from source_acceptance_test.base import BaseTest
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema
from source_acceptance_test.utils.common import find_key_inside_schema, find_keyword_schema
from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_key_inside_schema, find_keyword_schema
from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure


Expand Down Expand Up @@ -164,6 +164,20 @@ def test_oauth_flow_parameters(self, actual_connector_spec: ConnectorSpecificati
diff = params - schema_path
assert diff == set(), f"Specified oauth fields are missed from spec schema: {diff}"

def test_additional_properties_is_true(self, actual_connector_spec):
"""Check that value of the "additionalProperties" field is always true.
A spec declaring "additionalProperties": false introduces the risk of accidental breaking changes.
Specifically, when removing a property from the spec, existing connector configs will no longer be valid.
False value introduces the risk of accidental breaking changes.
Read https://github.com/airbytehq/airbyte/issues/14196 for more details"""
additional_properties_values = find_all_values_for_key_in_schema(
actual_connector_spec.connectionSpecification, "additionalProperties"
)
if additional_properties_values:
assert all(
[additional_properties_value is True for additional_properties_value in additional_properties_values]
), "When set, additionalProperties field value must be true for backward compatibility."


@pytest.mark.default_timeout(30)
class TestConnection(BaseTest):
Expand Down Expand Up @@ -249,6 +263,19 @@ def test_streams_has_sync_modes(self, discovered_catalog: Mapping[str, Any]):
assert stream.supported_sync_modes is not None, f"The stream {stream.name} is missing supported_sync_modes field declaration."
assert len(stream.supported_sync_modes) > 0, f"supported_sync_modes list on stream {stream.name} should not be empty."

def test_additional_properties_is_true(self, discovered_catalog: Mapping[str, Any]):
"""Check that value of the "additionalProperties" field is always true.
A stream schema declaring "additionalProperties": false introduces the risk of accidental breaking changes.
Specifically, when removing a property from the stream schema, existing connector catalog will no longer be valid.
False value introduces the risk of accidental breaking changes.
Read https://github.com/airbytehq/airbyte/issues/14196 for more details"""
for stream in discovered_catalog.values():
additional_properties_values = list(find_all_values_for_key_in_schema(stream.json_schema, "additionalProperties"))
if additional_properties_values:
assert all(
[additional_properties_value is True for additional_properties_value in additional_properties_values]
), "When set, additionalProperties field value must be true for backward compatibility."


def primary_keys_for_records(streams, records):
streams_with_primary_key = [stream for stream in streams if stream.stream.source_defined_primary_key]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,16 @@ def load_yaml_or_json_path(path: Path):
return load(file_data, Loader=Loader)
else:
raise RuntimeError("path must be a '.yaml' or '.json' file")


def find_all_values_for_key_in_schema(schema: dict, searched_key: str):
"""Retrieve all (nested) values in a schema for a specific searched key"""
if isinstance(schema, list):
for schema_item in schema:
yield from find_all_values_for_key_in_schema(schema_item, searched_key)
if isinstance(schema, dict):
for key, value in schema.items():
if key == searched_key:
yield value
if isinstance(value, dict) or isinstance(value, list):
yield from find_all_values_for_key_in_schema(value, searched_key)
Comment on lines +103 to +113
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not directly equivalent since this new method retrieves all values, but the existing find_key_inside_schema method can be used to find the first instance of a given key (e.g. find_key_inside_schema(schema, "additionalProperties") would return {"additionalProperties": False} or None if not found)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep I tried find_key_inside_schema, it's indeed working but only returns the first schema item having the searched key. I could have implemented a recursion in the final test function to use this function. But I preferred to use my new function to defer the recursion to the helper to boost the readability of the test. And this function can be helpful in the future if we have other keys we want to validate value on.

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#

import json
from contextlib import contextmanager

import pytest

Expand All @@ -17,3 +18,8 @@ def mssql_spec_schema():
def postgres_source_spec_schema():
with open("unit_tests/data/postgres_spec.json") as f:
return json.load(f).get("connectionSpecification")


@contextmanager
def does_not_raise():
yield
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

from contextlib import contextmanager
from unittest.mock import MagicMock, patch

import pytest
Expand All @@ -23,10 +22,7 @@
from source_acceptance_test.tests.test_core import TestBasicRead as _TestBasicRead
from source_acceptance_test.tests.test_core import TestDiscovery as _TestDiscovery


@contextmanager
def does_not_raise():
yield
from .conftest import does_not_raise


@pytest.mark.parametrize(
Expand Down Expand Up @@ -155,6 +151,52 @@ def test_supported_sync_modes_in_stream(discovered_catalog, expectation):
t.test_streams_has_sync_modes(discovered_catalog)


@pytest.mark.parametrize(
"discovered_catalog, expectation",
[
({"test_stream_1": AirbyteStream.parse_obj({"name": "test_stream_1", "json_schema": {}})}, does_not_raise()),
(
{"test_stream_2": AirbyteStream.parse_obj({"name": "test_stream_2", "json_schema": {"additionalProperties": True}})},
does_not_raise(),
),
(
{"test_stream_3": AirbyteStream.parse_obj({"name": "test_stream_3", "json_schema": {"additionalProperties": False}})},
pytest.raises(AssertionError),
),
(
{"test_stream_4": AirbyteStream.parse_obj({"name": "test_stream_4", "json_schema": {"additionalProperties": "foo"}})},
pytest.raises(AssertionError),
),
(
{
"test_stream_5": AirbyteStream.parse_obj(
{
"name": "test_stream_5",
"json_schema": {"additionalProperties": True, "properties": {"my_object": {"additionalProperties": True}}},
}
)
},
does_not_raise(),
),
(
{
"test_stream_6": AirbyteStream.parse_obj(
{
"name": "test_stream_6",
"json_schema": {"additionalProperties": True, "properties": {"my_object": {"additionalProperties": False}}},
}
)
},
pytest.raises(AssertionError),
),
],
)
def test_additional_properties_is_true(discovered_catalog, expectation):
t = _TestDiscovery()
with expectation:
t.test_additional_properties_is_true(discovered_catalog)


@pytest.mark.parametrize(
"schema, record, should_fail",
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from airbyte_cdk.models import ConnectorSpecification
from source_acceptance_test.tests.test_core import TestSpec as _TestSpec

from .conftest import does_not_raise


@pytest.mark.parametrize(
"connector_spec, should_fail",
Expand Down Expand Up @@ -567,3 +569,39 @@ def test_validate_oauth_flow(connector_spec, expected_error):
t.test_oauth_flow_parameters(connector_spec)
else:
t.test_oauth_flow_parameters(connector_spec)


@pytest.mark.parametrize(
"connector_spec, expectation",
[
(ConnectorSpecification(connectionSpecification={}), does_not_raise()),
(ConnectorSpecification(connectionSpecification={"type": "object", "additionalProperties": True}), does_not_raise()),
(ConnectorSpecification(connectionSpecification={"type": "object", "additionalProperties": False}), pytest.raises(AssertionError)),
(
ConnectorSpecification(
connectionSpecification={
"type": "object",
"additionalProperties": True,
"properties": {"my_object": {"type": "object", "additionalProperties": "foo"}},
}
),
pytest.raises(AssertionError),
),
(
ConnectorSpecification(
connectionSpecification={
"type": "object",
"additionalProperties": True,
"properties": {
"my_oneOf_object": {"type": "object", "oneOf": [{"additionalProperties": True}, {"additionalProperties": False}]}
},
}
),
pytest.raises(AssertionError),
),
],
)
def test_additional_properties_is_true(connector_spec, expectation):
t = _TestSpec()
with expectation:
t.test_additional_properties_is_true(connector_spec)
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pytest
import yaml
from docker.errors import ContainerError, NotFound
from source_acceptance_test.utils.common import load_yaml_or_json_path
from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, load_yaml_or_json_path
from source_acceptance_test.utils.compare import make_hashable
from source_acceptance_test.utils.connector_runner import ConnectorRunner

Expand Down Expand Up @@ -346,3 +346,33 @@ def test_load_other(self):
with tempfile.NamedTemporaryFile("w", suffix=".txt") as f:
with pytest.raises(RuntimeError):
load_yaml_or_json_path(Path(f.name))


@pytest.mark.parametrize(
"schema, searched_key, expected_values",
[
(
{
"looking_for_this_key": "first_match",
"foo": "bar",
"bar": {"looking_for_this_key": "second_match"},
"top_level_list": [
{"looking_for_this_key": "third_match"},
{"looking_for_this_key": "fourth_match"},
"dump_value",
{"nested_in_list": {"looking_for_this_key": "fifth_match"}},
],
},
"looking_for_this_key",
["first_match", "second_match", "third_match", "fourth_match", "fifth_match"],
),
({"foo": "bar", "bar": {"looking_for_this_key": "single_match"}}, "looking_for_this_key", ["single_match"]),
(
["foo", "bar", {"looking_for_this_key": "first_match"}, [{"looking_for_this_key": "second_match"}]],
"looking_for_this_key",
["first_match", "second_match"],
),
],
)
def test_find_all_values_for_key_in_schema(schema, searched_key, expected_values):
assert list(find_all_values_for_key_in_schema(schema, searched_key)) == expected_values