Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SAT: check that previous config schema validates against current connector spec #15367

Merged
merged 11 commits into from
Aug 9, 2022
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.60
Backward compatibility tests: validate fake previous config against current connector specification. [#15367](https://github.com/airbytehq/airbyte/pull/15367)

## 0.1.59
Backward compatibility tests: add syntactic validation of specs [#15194](https://github.com/airbytehq/airbyte/pull/15194/).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./
COPY source_acceptance_test ./source_acceptance_test
RUN pip install .

LABEL io.airbyte.version=0.1.59
LABEL io.airbyte.version=0.1.60
LABEL io.airbyte.name=airbyte/source-acceptance-test

ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]
4 changes: 3 additions & 1 deletion airbyte-integrations/bases/source-acceptance-test/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
"jsonschema~=3.2.0",
"jsonref==0.2",
"deepdiff~=5.8.0",
"requests-mock",
"requests-mock~=1.9.3",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this to freeze the version and avoid accidental breaking updates.

"pytest-mock~=3.6.1",
"hypothesis~=6.54.1",
"hypothesis-jsonschema~=0.20.1", # TODO alafanechere upgrade to latest when jsonschema lib is upgraded to >= 4.0.0 in airbyte-cdk and SAT
]

setuptools.setup(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from source_acceptance_test.base import BaseTest
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig, SpecTestConfig
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema
from source_acceptance_test.utils.backward_compatibility import SpecDiffChecker, validate_previous_configs
from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_keyword_schema
from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure

Expand All @@ -47,7 +48,12 @@ class TestSpec(BaseTest):

@staticmethod
def compute_spec_diff(actual_connector_spec: ConnectorSpecification, previous_connector_spec: ConnectorSpecification):
return DeepDiff(previous_connector_spec.dict(), actual_connector_spec.dict(), view="tree", ignore_order=True)
return DeepDiff(
previous_connector_spec.dict()["connectionSpecification"],
actual_connector_spec.dict()["connectionSpecification"],
view="tree",
ignore_order=True,
)
Comment on lines +51 to +56
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pedroslopez I changed this to only compute the diff on the connectionSpecification field of the spec. I think this is more robust as this is in this field that we want to perform the validation, not the other technical field that exists and might be changed.


@pytest.fixture(name="skip_backward_compatibility_tests")
def skip_backward_compatibility_tests_fixture(self, inputs: SpecTestConfig, previous_connector_docker_runner: ConnectorRunner) -> bool:
Expand All @@ -61,16 +67,6 @@ def skip_backward_compatibility_tests_fixture(self, inputs: SpecTestConfig, prev
pytest.skip(f"Backward compatibility tests are disabled for version {previous_connector_version}.")
return False

@pytest.fixture(name="spec_diff")
def spec_diff_fixture(
self,
skip_backward_compatibility_tests: bool,
actual_connector_spec: ConnectorSpecification,
previous_connector_spec: ConnectorSpecification,
) -> DeepDiff:
assert isinstance(actual_connector_spec, ConnectorSpecification) and isinstance(previous_connector_spec, ConnectorSpecification)
return self.compute_spec_diff(actual_connector_spec, previous_connector_spec)

def test_config_match_spec(self, actual_connector_spec: ConnectorSpecification, connector_config: SecretDict):
"""Check that config matches the actual schema from the spec call"""
# Getting rid of technical variables that start with an underscore
Expand Down Expand Up @@ -182,104 +178,22 @@ def test_oauth_flow_parameters(self, actual_connector_spec: ConnectorSpecificati

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_new_required_field_declaration(self, spec_diff: DeepDiff):
"""Check if a 'required' field was added to the spec."""
added_required_fields = [
addition for addition in spec_diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "required"
]
assert len(added_required_fields) == 0, f"The current spec has a new required field: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_new_required_property(self, spec_diff: DeepDiff):
"""Check if a a new property was added to the 'required' field."""
added_required_properties = [
addition for addition in spec_diff.get("iterable_item_added", []) if addition.up.path(output_format="list")[-1] == "required"
]
assert len(added_required_properties) == 0, f"The current spec has a new required property: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_field_type_changed(self, spec_diff: DeepDiff):
"""Check if the current spec is changing the types of properties."""

common_error_message = f"The current spec changed the value of a 'type' field: {spec_diff.pretty()}"
# Detect type value change in case type field is declared as a string (e.g "str" -> "int"):
type_values_changed = [change for change in spec_diff.get("values_changed", []) if change.path(output_format="list")[-1] == "type"]

# Detect type value change in case type field is declared as a single item list (e.g ["str"] -> ["int"]):
type_values_changed_in_list = [
change for change in spec_diff.get("values_changed", []) if change.path(output_format="list")[-2] == "type"
]

assert len(type_values_changed_in_list) == 0 and len(type_values_changed) == 0, common_error_message

# Detect type value added to type list if new type value is not None (e.g ["str"] -> ["str", "int"]):
# It works because we compute the diff with 'ignore_order=True'
new_values_in_type_list = [ # noqa: F841
change
for change in spec_diff.get("iterable_item_added", [])
if change.path(output_format="list")[-2] == "type"
if change.t2 != "null"
]
# enable the assertion below if we want to disallow type expansion:
# assert len(new_values_in_type_list) == 0, common_error_message

# Detect the change of type of a type field
# e.g:
# - "str" -> ["str"] VALID
# - "str" -> ["str", "null"] VALID
# - "str" -> ["str", "int"] VALID
# - "str" -> 1 INVALID
# - ["str"] -> "str" VALID
# - ["str"] -> "int" INVALID
# - ["str"] -> 1 INVALID
type_changes = [change for change in spec_diff.get("type_changes", []) if change.path(output_format="list")[-1] == "type"]
for change in type_changes:
# We only accept change on the type field if the new type for this field is list or string
# This might be something already guaranteed by JSON schema validation.
if isinstance(change.t1, str):
assert isinstance(
change.t2, list
), f"The current spec change a type field from string to an invalid value: {spec_diff.pretty()}"
assert (
0 < len(change.t2) <= 2
), f"The current spec change a type field from string to an invalid value. The type list length should not be empty and have a maximum of two items {spec_diff.pretty()}."
# If the new type field is a list we want to make sure it only has the original type (t1) and null: e.g. "str" -> ["str", "null"]
# We want to raise an error otherwise.
t2_not_null_types = [_type for _type in change.t2 if _type != "null"]
assert (
len(t2_not_null_types) == 1 and t2_not_null_types[0] == change.t1
), "The current spec change a type field to a list with multiple invalid values."
if isinstance(change.t1, list):
assert isinstance(
change.t2, str
), f"The current spec change a type field from list to an invalid value: {spec_diff.pretty()}"
assert len(change.t1) == 1 and change.t2 == change.t1[0], f"The current spec narrowed a field type: {spec_diff.pretty()}"

# Detect when field was made not nullable but is still a list: e.g ["string", "null"] -> ["string"]
removed_nullable = [
change for change in spec_diff.get("iterable_item_removed", []) if change.path(output_format="list")[-2] == "type"
]
assert len(removed_nullable) == 0, f"The current spec narrowed a field type: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_enum_field_has_narrowed(self, spec_diff: DeepDiff):
"""Check if the list of values in a enum was shortened in a spec."""
removals = [
removal for removal in spec_diff.get("iterable_item_removed", []) if removal.up.path(output_format="list")[-1] == "enum"
]
assert len(removals) == 0, f"The current spec narrowed a field enum: {spec_diff.pretty()}"

@pytest.mark.default_timeout(60)
@pytest.mark.backward_compatibility
def test_new_enum_field_declaration(self, spec_diff: DeepDiff):
"""Check if an 'enum' field was added to the spec."""
added_enum_fields = [
addition for addition in spec_diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "enum"
]
assert len(added_enum_fields) == 0, f"An 'enum' field was declared on an existing property of the spec: {spec_diff.pretty()}"
def test_backward_compatibility(
self,
skip_backward_compatibility_tests: bool,
actual_connector_spec: ConnectorSpecification,
previous_connector_spec: ConnectorSpecification,
number_of_configs_to_generate: int = 100,
):
"""Check if the current spec is backward_compatible:
1. Perform multiple hardcoded syntactic checks with SpecDiffChecker.
2. Validate fake generated previous configs against the actual connector specification with validate_previous_configs.
"""
assert isinstance(actual_connector_spec, ConnectorSpecification) and isinstance(previous_connector_spec, ConnectorSpecification)
spec_diff = self.compute_spec_diff(actual_connector_spec, previous_connector_spec)
Comment on lines +192 to +193
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we doing this here instead of using the spec_diff fixture?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to discard spec_diff fixture for the moment because:

  • No other tests are using it.
  • The signature of test_backward_compatibility was changed to use actual_connector_spec and previous_connector_spec (required for validate_previous_configs). Using a spec_diff fixture was convenient to have a lighter signature on test_backward_compatibility that only required spec_diff fixture, but now that I added validate_previous_configs, I find it overkill to add a dedicated spec_diff fixture that takes the same inputs as the actual test.

checker = SpecDiffChecker(spec_diff)
checker.assert_spec_is_backward_compatible()
validate_previous_configs(previous_connector_spec, actual_connector_spec, number_of_configs_to_generate)

def test_additional_properties_is_true(self, actual_connector_spec: ConnectorSpecification):
"""Check that value of the "additionalProperties" field is always true.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import jsonschema
from airbyte_cdk.models import ConnectorSpecification
from deepdiff import DeepDiff
from hypothesis import given, settings
from hypothesis_jsonschema import from_schema
from source_acceptance_test.utils import SecretDict


class NonBackwardCompatibleSpecError(Exception):
pass


class SpecDiffChecker:
"""A class to perform multiple backward compatible checks on a spec diff"""

def __init__(self, diff: DeepDiff) -> None:
self._diff = diff

def assert_spec_is_backward_compatible(self):
self.check_if_declared_new_required_field()
self.check_if_added_a_new_required_property()
self.check_if_value_of_type_field_changed()
# self.check_if_new_type_was_added() We want to allow type expansion atm
self.check_if_type_of_type_field_changed()
self.check_if_field_was_made_not_nullable()
self.check_if_enum_was_narrowed()
self.check_if_declared_new_enum_field()

def _raise_error(self, message: str):
raise NonBackwardCompatibleSpecError(f"{message}: {self._diff.pretty()}")

def check_if_declared_new_required_field(self):
"""Check if the new spec declared a 'required' field."""
added_required_fields = [
addition for addition in self._diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "required"
]
if added_required_fields:
self._raise_error("The current spec declared a new 'required' field")

def check_if_added_a_new_required_property(self):
"""Check if the new spec added a property to the 'required' list."""
added_required_properties = [
addition for addition in self._diff.get("iterable_item_added", []) if addition.up.path(output_format="list")[-1] == "required"
]
if added_required_properties:
self._raise_error("A new property was added to 'required'")

def check_if_value_of_type_field_changed(self):
"""Check if a type was changed"""
# Detect type value change in case type field is declared as a string (e.g "str" -> "int"):
type_values_changed = [change for change in self._diff.get("values_changed", []) if change.path(output_format="list")[-1] == "type"]

# Detect type value change in case type field is declared as a single item list (e.g ["str"] -> ["int"]):
type_values_changed_in_list = [
change for change in self._diff.get("values_changed", []) if change.path(output_format="list")[-2] == "type"
]
if type_values_changed or type_values_changed_in_list:
self._raise_error("The current spec changed the value of a 'type' field")

def check_if_new_type_was_added(self):
"""Detect type value added to type list if new type value is not None (e.g ["str"] -> ["str", "int"])"""
new_values_in_type_list = [
change
for change in self._diff.get("iterable_item_added", [])
if change.path(output_format="list")[-2] == "type"
if change.t2 != "null"
]
if new_values_in_type_list:
self._raise_error("The current spec changed the value of a 'type' field")

def check_if_type_of_type_field_changed(self):
"""
Detect the change of type of a type field
e.g:
- "str" -> ["str"] VALID
- "str" -> ["str", "null"] VALID
- "str" -> ["str", "int"] VALID
- "str" -> 1 INVALID
- ["str"] -> "str" VALID
- ["str"] -> "int" INVALID
- ["str"] -> 1 INVALID
"""
type_changes = [change for change in self._diff.get("type_changes", []) if change.path(output_format="list")[-1] == "type"]
for change in type_changes:
# We only accept change on the type field if the new type for this field is list or string
# This might be something already guaranteed by JSON schema validation.
if isinstance(change.t1, str):
if not isinstance(change.t2, list):
self._raise_error("The current spec change a type field from string to an invalid value.")
if not 0 < len(change.t2) <= 2:
self._raise_error(
"The current spec change a type field from string to an invalid value. The type list length should not be empty and have a maximum of two items."
)
# If the new type field is a list we want to make sure it only has the original type (t1) and null: e.g. "str" -> ["str", "null"]
# We want to raise an error otherwise.
t2_not_null_types = [_type for _type in change.t2 if _type != "null"]
if not (len(t2_not_null_types) == 1 and t2_not_null_types[0] == change.t1):
self._raise_error("The current spec change a type field to a list with multiple invalid values.")
if isinstance(change.t1, list):
if not isinstance(change.t2, str):
self._raise_error("The current spec change a type field from list to an invalid value.")
if not (len(change.t1) == 1 and change.t2 == change.t1[0]):
self._raise_error("The current spec narrowed a field type.")

def check_if_field_was_made_not_nullable(self):
"""Detect when field was made not nullable but is still a list: e.g ["string", "null"] -> ["string"]"""
removed_nullable = [
change for change in self._diff.get("iterable_item_removed", []) if change.path(output_format="list")[-2] == "type"
]
if removed_nullable:
self._raise_error("The current spec narrowed a field type or made a field not nullable.")

def check_if_enum_was_narrowed(self):
"""Check if the list of values in a enum was shortened in a spec."""
enum_removals = [
enum_removal
for enum_removal in self._diff.get("iterable_item_removed", [])
if enum_removal.up.path(output_format="list")[-1] == "enum"
]
if enum_removals:
self._raise_error("The current spec narrowed an enum field.")

def check_if_declared_new_enum_field(self):
"""Check if an 'enum' field was added to the spec."""
enum_additions = [
enum_addition
for enum_addition in self._diff.get("dictionary_item_added", [])
if enum_addition.path(output_format="list")[-1] == "enum"
]
if enum_additions:
self._raise_error("An 'enum' field was declared on an existing property of the spec.")


def validate_previous_configs(
previous_connector_spec: ConnectorSpecification, actual_connector_spec: ConnectorSpecification, number_of_configs_to_generate=100
):
"""Use hypothesis and hypothesis-jsonschema to run property based testing:
1. Generate fake previous config with the previous connector specification json schema.
2. Validate a fake previous config against the actual connector specification json schema."""

@given(from_schema(previous_connector_spec.dict()["connectionSpecification"]))
@settings(max_examples=number_of_configs_to_generate)
def check_fake_previous_config_against_actual_spec(fake_previous_config):
fake_previous_config = SecretDict(fake_previous_config)
filtered_fake_previous_config = {key: value for key, value in fake_previous_config.data.items() if not key.startswith("_")}
try:
jsonschema.validate(instance=filtered_fake_previous_config, schema=actual_connector_spec.connectionSpecification)
except jsonschema.exceptions.ValidationError as err:
raise NonBackwardCompatibleSpecError(err)

check_fake_previous_config_against_actual_spec()
Loading