From 1efe2f010d8292a80774b92699145d77a5b44bcf Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 11 Aug 2022 02:35:23 -0700 Subject: [PATCH 01/28] draft: first pass at complete schema language generation and factory validator --- .../auth/declarative_authenticator.py | 12 ++ .../sources/declarative/auth/oauth.py | 5 +- .../sources/declarative/auth/token.py | 7 +- .../declarative/decoders/json_decoder.py | 3 +- .../declarative/extractors/record_filter.py | 4 +- .../sources/declarative/parsers/factory.py | 48 ++++- .../declarative/requesters/http_requester.py | 14 +- .../requesters/paginators/no_pagination.py | 3 +- .../stream_slicers/substream_slicer.py | 2 +- .../declarative/yaml_declarative_source.py | 90 ++++++++- .../sources/declarative/test_factory.py | 181 ++++++++++++++++-- .../declarative/transformations/testing.py | 162 ++++++++++++++++ 12 files changed, 498 insertions(+), 33 deletions(-) create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py new file mode 100644 index 0000000000000..a90329c955500 --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -0,0 +1,12 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# +from abc import ABC +from dataclasses import dataclass + + +@dataclass +class DeclarativeAuthenticator(ABC): + """ + Interface used to associate which authenticators can be used as part of the declarative framework + """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py index ff9d5ef8b104a..2446ba131ed76 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py @@ -6,6 +6,7 @@ from typing import Any, List, Mapping, Optional, Union import pendulum +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import AbstractOauth2Authenticator @@ -13,7 +14,7 @@ @dataclass -class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, JsonSchemaMixin): +class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAuthenticator, JsonSchemaMixin): """ Generates OAuth2.0 access tokens from an OAuth2.0 refresh token and client credentials based on a declarative connector configuration file. Credentials can be defined explicitly or via interpolation @@ -40,7 +41,7 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, JsonSchemaMixi options: InitVar[Mapping[str, Any]] scopes: Optional[List[str]] = None token_expiry_date: Optional[Union[InterpolatedString, str]] = None - _token_expiry_date: pendulum.DateTime = field(init=False, repr=False) + _token_expiry_date: pendulum.DateTime = field(init=False, repr=False, default=None) access_token_name: Union[InterpolatedString, str] = "access_token" expires_in_name: Union[InterpolatedString, str] = "expires_in" refresh_request_body: Optional[Mapping[str, Any]] = None diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py index 04790ae9e3036..93c979a941926 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py @@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass from typing import Any, Mapping, Union +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.types import Config from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator @@ -13,7 +14,7 @@ @dataclass -class ApiKeyAuthenticator(AbstractHeaderAuthenticator, JsonSchemaMixin): +class ApiKeyAuthenticator(AbstractHeaderAuthenticator, DeclarativeAuthenticator, JsonSchemaMixin): """ ApiKeyAuth sets a request header on the HTTP requests sent. @@ -51,7 +52,7 @@ def token(self) -> str: @dataclass -class BearerAuthenticator(AbstractHeaderAuthenticator, JsonSchemaMixin): +class BearerAuthenticator(AbstractHeaderAuthenticator, DeclarativeAuthenticator, JsonSchemaMixin): """ Authenticator that sets the Authorization header on the HTTP requests sent. @@ -81,7 +82,7 @@ def token(self) -> str: @dataclass -class BasicHttpAuthenticator(AbstractHeaderAuthenticator): +class BasicHttpAuthenticator(AbstractHeaderAuthenticator, DeclarativeAuthenticator, JsonSchemaMixin): """ Builds auth based off the basic authentication scheme as defined by RFC 7617, which transmits credentials as USER ID/password pairs, encoded using bas64 https://developer.mozilla.org/en-US/docs/Web/HTTP/Authentication#basic_authentication_scheme diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 0cea903656845..1d34b79cd4c16 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -7,10 +7,11 @@ import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class JsonDecoder(Decoder): +class JsonDecoder(Decoder, JsonSchemaMixin): """ Decoder strategy that returns the json-encoded content of a response, if any. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py index 081dd75971300..36ad97c1aa7e7 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py @@ -2,7 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from dataclasses import InitVar, dataclass, field +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Optional from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean @@ -20,7 +20,7 @@ class RecordFilter(JsonSchemaMixin): """ options: InitVar[Mapping[str, Any]] - config: Config = field(default=dict) + config: Config condition: str = "" def __post_init__(self, options: Mapping[str, Any]): diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py index 6303b05ca1f82..87ea447db6439 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py @@ -7,6 +7,7 @@ import copy import enum import importlib +from dataclasses import fields from typing import Any, List, Literal, Mapping, Type, Union, get_args, get_origin, get_type_hints from airbyte_cdk.sources.declarative.create_partial import OPTIONS_STR, create @@ -14,6 +15,7 @@ from airbyte_cdk.sources.declarative.parsers.class_types_registry import CLASS_TYPES_REGISTRY from airbyte_cdk.sources.declarative.parsers.default_implementation_registry import DEFAULT_IMPLEMENTATIONS_REGISTRY from airbyte_cdk.sources.declarative.types import Config +from jsonschema.validators import validate ComponentDefinition: Union[Literal, Mapping, List] @@ -96,8 +98,9 @@ class DeclarativeComponentFactory: """ - def __init__(self): + def __init__(self, instantiate: bool = True): self._interpolator = JinjaInterpolation() + self.instantiate = instantiate def create_component(self, component_definition: ComponentDefinition, config: Config): """ @@ -128,7 +131,29 @@ def build(self, class_or_class_name: Union[str, Type], config, **kwargs): kwargs[OPTIONS_STR] = {k: self._create_subcomponent(k, v, kwargs, config, class_) for k, v in kwargs[OPTIONS_STR].items()} updated_kwargs = {k: self._create_subcomponent(k, v, kwargs, config, class_) for k, v in kwargs.items()} - return create(class_, config=config, **updated_kwargs) + + if self.instantiate: + return create(class_, config=config, **updated_kwargs) + else: + # generate the schema for the current class (include a subcall to remap the interface to the a union) + self._transform_interface_to_union(class_) + schema = class_.json_schema() + + # Hack to properly override the schema to check enum type. Ideally would not do this sort of thing + # if transformed_schema is HttpRequester: + # schema['properties']['http_method']['anyOf'][1] = {"enum": [HttpMethod.GET, HttpMethod.POST]} + + # Validate against the concrete object as a result of invoking the create function + # component_func = create(class_, config=config, **updated_kwargs) + # component = component_func() + # validate(component.to_dict(), schema) + # return component_func + + # Validate using the component definition (not sure why it can't validate instances saying they're not objects) + component_definition = {**updated_kwargs, **{k: v for k, v in updated_kwargs[OPTIONS_STR].items() if k not in updated_kwargs}} + component_definition["config"] = config + # schema['type'] = 'dict' + validate(component_definition, schema) @staticmethod def _get_class_from_fully_qualified_class_name(class_name: str): @@ -238,3 +263,22 @@ def _is_builtin_type(cls) -> bool: if not cls: return False return cls.__module__ == "builtins" + + @staticmethod + # def _transform_interface_to_union(cls: type, cache: Mapping[type, List[type]]): + def _transform_interface_to_union(cls: type): + og_bases = cls.__bases__ + og_dict = dict(cls.__dict__) + copy_cls = type(cls.__name__, og_bases, og_dict) + # needed if we accidentally pull in nondeclarative interface implementers like legacy NoAuth (fixed now actually) + # if not dataclasses.is_dataclass(copy_cls): + # return copy_cls + class_fields = fields(copy_cls) + for field in class_fields: + some_field = field.type + module = some_field.__module__ + if module != "builtins" and module != "typing": + subclasses = some_field.__subclasses__() + if subclasses: + copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] + # return copy_cls diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 4658e66c704f3..c74e6eaaeee73 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -7,6 +7,8 @@ from typing import Any, Mapping, MutableMapping, Optional, Union import requests +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator +from airbyte_cdk.sources.declarative.auth.token import BasicHttpAuthenticator from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.error_handlers.default_error_handler import DefaultErrorHandler from airbyte_cdk.sources.declarative.requesters.error_handlers.error_handler import ErrorHandler @@ -14,10 +16,8 @@ from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import ( InterpolatedRequestOptionsProvider, ) -from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState -from airbyte_cdk.sources.streams.http.auth import HttpAuthenticator, NoAuth from dataclasses_jsonschema import JsonSchemaMixin @@ -31,8 +31,8 @@ class HttpRequester(Requester, JsonSchemaMixin): url_base (InterpolatedString): Base url to send requests to path (InterpolatedString): Path to send requests to http_method (Union[str, HttpMethod]): HTTP method to use when sending requests - request_options_provider (Optional[RequestOptionsProvider]): request option provider defining the options to set on outgoing requests - authenticator (HttpAuthenticator): Authenticator defining how to authenticate to the source + request_options_provider (Optional[InterpolatedRequestOptionsProvider]): request option provider defining the options to set on outgoing requests + authenticator (DeclarativeAuthenticator): Authenticator defining how to authenticate to the source error_handler (Optional[ErrorHandler]): Error handler defining how to detect and handle errors config (Config): The user-provided configuration as specified by the source's spec """ @@ -43,8 +43,8 @@ class HttpRequester(Requester, JsonSchemaMixin): config: Config options: InitVar[Mapping[str, Any]] http_method: Union[str, HttpMethod] = HttpMethod.GET - request_options_provider: Optional[RequestOptionsProvider] = None - authenticator: HttpAuthenticator = None + request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None + authenticator: DeclarativeAuthenticator = None error_handler: Optional[ErrorHandler] = None def __post_init__(self, options: Mapping[str, Any]): @@ -54,7 +54,7 @@ def __post_init__(self, options: Mapping[str, Any]): self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, **self.request_options_provider) else: self._request_options_provider = self.request_options_provider - self.authenticator = self.authenticator or NoAuth() + self.authenticator = self.authenticator or BasicHttpAuthenticator("", config=self.config, options={}) if type(self.http_method) == str: self.http_method = HttpMethod[self.http_method] self._method = self.http_method diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py index 210b00c731236..9be1249eded79 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py @@ -8,10 +8,11 @@ import requests from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class NoPagination(Paginator): +class NoPagination(Paginator, JsonSchemaMixin): """ Pagination implementation that never returns a next page. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py index d5b8b306b86dd..402e24027936d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py @@ -14,7 +14,7 @@ @dataclass -class ParentStreamConfig: +class ParentStreamConfig(JsonSchemaMixin): """ Describes how to create a stream slice from a parent stream diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index bebecdfa2e2a3..34823e3de3d26 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -4,13 +4,27 @@ import json import logging -from typing import Any, List, Mapping +import typing +from dataclasses import dataclass, fields +from enum import EnumMeta +from typing import Any, List, Mapping, Union +from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource +from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser -from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod +from airbyte_cdk.sources.streams.core import Stream +from dataclasses_jsonschema import JsonSchemaMixin +from jsonschema.validators import validate + + +@dataclass +class ConcreteDeclarativeSource(JsonSchemaMixin): + checker: CheckStream + streams: List[DeclarativeStream] class YamlDeclarativeSource(DeclarativeSource): @@ -47,4 +61,74 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: def _read_and_parse_yaml_file(self, path_to_yaml_file): with open(path_to_yaml_file, "r") as f: config_content = f.read() - return YamlParser().parse(config_content) + # Add schema validation entry point here using the factory + parsed_config = YamlParser().parse(config_content) + + self._validate_source(parsed_config) + return parsed_config + + def _validate_source(self, parsed_config): + concrete_source = ConcreteDeclarativeSource( + checker=self._source_config["check"], + streams=[self._factory.create_component(stream_config, {})() for stream_config in self._source_config["streams"]], + ) + declarative_source_schema = ConcreteDeclarativeSource.json_schema() + validate(concrete_source, declarative_source_schema) + + @classmethod + def generate_schema(cls) -> str: + expanded_source_definition = cls.expand_schema_interfaces(ConcreteDeclarativeSource, {}) + expanded_schema = expanded_source_definition.json_schema() + return json.dumps(expanded_schema, cls=SchemaEncoder) + + # Seriously me right now + # https://i.kym-cdn.com/entries/icons/original/000/022/524/tumblr_o16n2kBlpX1ta3qyvo1_1280.jpg + @classmethod + def expand_schema_interfaces(cls, expand_class: type, cache: dict) -> type: + if expand_class.__name__ in cache: + return expand_class + + # We don't need to expand enums + if isinstance(expand_class, EnumMeta): + return expand_class + + # We can't parse CDK constructs past the declarative level + if expand_class.__name__ == "Stream" or expand_class.__name__ == "HttpStream": + return expand_class + + cache[expand_class.__name__] = expand_class + + copy_cls = type(expand_class.__name__, expand_class.__bases__, dict(expand_class.__dict__)) + class_fields = fields(copy_cls) + for field in class_fields: + unpacked_types = cls.unpack(field.type) + for field_type in unpacked_types: + module = field_type.__module__ + if module != "builtins" and module != "typing" and module != "pendulum.datetime": + # Also need to traverse down each objects fields + if field_type not in cache: + cls.expand_schema_interfaces(field_type, cache) + + subclasses = field_type.__subclasses__() + for subclass in subclasses: + cls.expand_schema_interfaces(subclass, cache) + if subclasses: + copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] + return copy_cls + + # For components that are stored behind generics like List, Union, Optional, etc, we need to unpack the underlying type + @classmethod + def unpack(cls, field_type) -> typing.Tuple: + origin = typing.get_origin(field_type) + if origin == list or origin == Union: + return typing.get_args(field_type) + return (field_type,) + + +class SchemaEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, property): + return str(obj) + elif isinstance(obj, HttpMethod): + return str(obj) + return json.JSONEncoder.default(self, obj) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index 190c448460475..175b8ac1104c0 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -30,6 +30,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource factory = DeclarativeComponentFactory() @@ -75,6 +76,11 @@ def test_interpolate_config(): interpolated_body_field: "{{ config['apikey'] }}" """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["authenticator"], input_config) + + factory.instantiate = True authenticator = factory.create_component(config["authenticator"], input_config)() assert authenticator.client_id.eval(input_config) == "some_client_id" assert authenticator.client_secret.string == "some_client_secret" @@ -94,6 +100,11 @@ def test_list_based_stream_slicer_with_values_refd(): cursor_field: repository """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["stream_slicer"], input_config) + + factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert ["airbyte", "airbyte-cloud"] == stream_slicer.slice_values @@ -109,6 +120,11 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): field_name: repository """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["stream_slicer"], input_config) + + factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert ["airbyte", "airbyte-cloud"] == stream_slicer.slice_values assert stream_slicer.request_option.inject_into == RequestOptionType.header @@ -118,28 +134,29 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): def test_create_substream_slicer(): content = """ schema_loader: - file_path: "./source_sendgrid/schemas/{{ options['stream_name'] }}.yaml" + file_path: "./source_sendgrid/schemas/{{ options['name'] }}.yaml" name: "{{ options['stream_name'] }}" retriever: requester: - name: "{{ options['stream_name'] }}" - path: "/v3" + name: "{{ options['name'] }}" + type: "HttpRequester" + path: "kek" record_selector: extractor: transform: "_" stream_A: type: DeclarativeStream $options: - stream_name: "A" - stream_primary_key: "id" + name: "A" + primary_key: "id" retriever: "*ref(retriever)" url_base: "https://airbyte.io" schema_loader: "*ref(schema_loader)" stream_B: type: DeclarativeStream $options: - stream_name: "B" - stream_primary_key: "id" + name: "B" + primary_key: "id" retriever: "*ref(retriever)" url_base: "https://airbyte.io" schema_loader: "*ref(schema_loader)" @@ -157,6 +174,11 @@ def test_create_substream_slicer(): stream_slice_field: word_id """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["stream_slicer"], input_config) + + factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() parent_stream_configs = stream_slicer.parent_stream_configs assert len(parent_stream_configs) == 2 @@ -191,6 +213,12 @@ def test_create_cartesian_stream_slicer(): - "*ref(stream_slicer_B)" """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["stream_slicer"], input_config) + + factory.instantiate = True + stream_slicer = factory.create_component(config["stream_slicer"], input_config)() underlying_slicers = stream_slicer.stream_slicers assert len(underlying_slicers) == 2 @@ -220,6 +248,11 @@ def test_datetime_stream_slicer(): """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["stream_slicer"], input_config) + + factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert type(stream_slicer) == DatetimeStreamSlicer assert stream_slicer._timezone == datetime.timezone.utc @@ -276,7 +309,7 @@ def test_full_config(): api_token: "{{ config['apikey'] }}" request_parameters_provider: "*ref(request_options_provider)" error_handler: - type: DefaultErrorHandler + type: NoPagination retriever: class_name: "airbyte_cdk.sources.declarative.retrievers.simple_retriever.SimpleRetriever" name: "{{ options['name'] }}" @@ -298,7 +331,7 @@ def test_full_config(): primary_key: "id" extractor: $ref: "*ref(extractor)" - transform: "_.result" + transform: "_.result" # <- change here for chaos retriever: $ref: "*ref(retriever)" requester: @@ -316,6 +349,10 @@ def test_full_config(): """ config = parser.parse(content) + factory.instantiate = False + factory.create_component(config["list_stream"], input_config) + + factory.instantiate = True stream_config = config["list_stream"] assert stream_config["class_name"] == "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" assert stream_config["cursor_field"] == [] @@ -361,6 +398,11 @@ def test_create_record_selector(): transform: "_.result" """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["selector"], input_config) + + factory.instantiate = True selector = factory.create_component(config["selector"], input_config)() assert isinstance(selector, RecordSelector) assert isinstance(selector.extractor, JelloExtractor) @@ -382,11 +424,16 @@ def test_create_requester(): password: "{{ config.apikey }}" request_options_provider: request_parameters: - page_size: 10 + a_parameter: "something_here" request_headers: header: header_value """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["requester"], input_config) + + factory.instantiate = True component = factory.create_component(config["requester"], input_config)() assert isinstance(component, HttpRequester) assert isinstance(component.error_handler, DefaultErrorHandler) @@ -396,7 +443,7 @@ def test_create_requester(): assert component.authenticator._username.eval(input_config) == "lists" assert component.authenticator._password.eval(input_config) == "verysecrettoken" assert component._method == HttpMethod.GET - assert component._request_options_provider._parameter_interpolator._interpolator.mapping["page_size"] == 10 + assert component._request_options_provider._parameter_interpolator._interpolator.mapping["a_parameter"] == "something_here" assert component._request_options_provider._headers_interpolator._interpolator.mapping["header"] == "header_value" assert component.name == "lists" @@ -414,6 +461,11 @@ def test_create_composite_error_handler(): action: RETRY """ config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["error_handler"], input_config) + + factory.instantiate = True component = factory.create_component(config["error_handler"], input_config)() assert len(component.error_handlers) == 2 assert isinstance(component.error_handlers[0], DefaultErrorHandler) @@ -461,6 +513,10 @@ def test_config_with_defaults(): """ config = parser.parse(content) + factory.instantiate = False + factory.create_component(config["lists_stream"], input_config) + + factory.instantiate = True stream_config = config["lists_stream"] stream = factory.create_component(stream_config, input_config)() assert type(stream) == DeclarativeStream @@ -496,6 +552,10 @@ def test_create_limit_paginator(): """ config = parser.parse(content) + factory.instantiate = False + factory.create_component(config["paginator"], input_config) + + factory.instantiate = True paginator_config = config["paginator"] paginator = factory.create_component(paginator_config, input_config)() assert isinstance(paginator, LimitPaginator) @@ -580,3 +640,102 @@ def test_add_fields(self): ) ] assert expected == component.transformations + + +def test_validation_wrong_input_type(): + content = """ + extractor: + type: JelloExtractor + transform: "_.result" + selector: + class_name: airbyte_cdk.sources.declarative.extractors.record_selector.RecordSelector + record_filter: + class_name: airbyte_cdk.sources.declarative.extractors.record_filter.RecordFilter + condition: "{{ record['id'] > stream_state['id'] }}" + extractor: + $ref: "*ref(extractor)" + transform: 408 + """ + config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["selector"], input_config) + + +def test_validation_type_missing_required_fields(): + content = """ + stream_slicer: + type: DatetimeStreamSlicer + $options: + datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + start_datetime: + type: MinMaxDatetime + datetime: "{{ config['start_time'] }}" + min_datetime: "{{ config['start_time'] + day_delta(2) }}" + end_datetime: "{{ config['end_time'] }}" + step: "10d" + cursor_field: "created" + lookback_window: "5d" + start_time_option: + inject_into: request_parameter + field_name: created[gte] + """ + + config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["stream_slicer"], input_config) + + +# It's sort of a bother that this does not "actually" test the type, it just does a if it has fields that look like they type +# then it's a-okay by me... This sort of works, but isn't exactly what we want either since it's not that descriptive. Hence why this works, +# when it really shouldn't +def test_validation_wrong_object_type(): + content = """ + paginator: + type: "LimitPaginator" + page_size: 10 + url_base: "https://airbyte.io" + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "CursorPagination" + cursor_value: "{{ response._metadata.next }}" + decoder: + type: "MinMaxDatetime" + datetime: "{{ response._metadata.next }}" + """ + config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["paginator"], input_config) + + +def test_validation_wrong_interface_type(): + content = """ + paginator: + type: "LimitPaginator" + page_size: 10 + url_base: "https://airbyte.io" + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "MinMaxDatetime" + datetime: "{{ response._metadata.next }}" + """ + config = parser.parse(content) + + factory.instantiate = False + factory.create_component(config["paginator"], input_config) + + +def test_demo_schema_generation(): + # declarative_source = YamlDeclarativeSource("test_factory.py") + schema = YamlDeclarativeSource.generate_schema() + print("\n" + schema) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py new file mode 100644 index 0000000000000..c8544e87246b1 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py @@ -0,0 +1,162 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +import dataclasses +import pprint +from dataclasses import dataclass, fields +from typing import Any, Mapping, Type, Union + +from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream +from airbyte_cdk.sources.declarative.decoders import JsonDecoder +from airbyte_cdk.sources.declarative.requesters import RequestOption +from airbyte_cdk.sources.declarative.requesters.paginators import LimitPaginator +from airbyte_cdk.sources.declarative.requesters.paginators.strategies import PageIncrement +from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType +from airbyte_cdk.sources.declarative.stream_slicers import SingleSlice +from dataclasses_jsonschema import JsonSchemaMixin +from jsonschema.validators import validate + + +@dataclass +class Interface(JsonSchemaMixin): + """This is an interface""" + + +count = 0 + + +@dataclass +class ChildInterfaceString(Interface, JsonSchemaMixin): + """ConcreteType1""" + + child_field: str + + +@dataclass +class ChildInt(Interface): + """ConcreteType2""" + + int_field: int + + +@dataclass +class ConcreteClass(JsonSchemaMixin): + number: int = 15 + + +@dataclass +class SomeOtherClass(JsonSchemaMixin): + """Outerobject containing interface""" + + f: int + + g: ConcreteClass + h: Interface + + +def test_json_schema(): + # copy the top level class (probably DeclarativeSource?) + copy_cls = type("Copymixin", SomeOtherClass.__bases__, dict(SomeOtherClass.__dict__)) + + class_fields = fields(copy_cls) + # iterate over the fields + for field in class_fields: + t = field.type + subsclasses = t.__subclasses__() + # Only replace the type if there are subclasses and t is not in builtins + if subsclasses and t.__module__ != "builtins": + # replace the type with union of subclasses + field.type = Union[tuple(subsclasses)] + copy_cls.__dict__["__annotations__"][field.name] = Union[tuple(subsclasses)] + + json_schema = copy_cls.json_schema() + + pprint.pprint(json_schema) + assert "anyOf" in json_schema["properties"]["h"] + + +# make this a mixin +def test_pagination_strategy(): + # conc = LimitPaginator( + # page_size=100, + # limit_option=RequestOption + # ) + # og_bases = LimitPaginator.__bases__ + # og_dict = dict(LimitPaginator.__dict__) + # copy_name = LimitPaginator.__name__ + "Copy" + # copy_cls = type(LimitPaginator.__name__, og_bases, og_dict) + # class_fields = fields(copy_cls) + # for field in class_fields: + # some_field = field.type + # module = some_field.__module__ + # if module != "builtins" and module != "typing": + # subclasses = some_field.__subclasses__() + # if subclasses: + # copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] + # print(subclasses) + copy_cls = replace_unions(LimitPaginator, dict()) + + schema = copy_cls.json_schema() + print(schema) + + # Test validate + + paginator = LimitPaginator( + page_size=100, + limit_option=RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", options={}), + page_token_option=RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", options={}), + pagination_strategy=PageIncrement(50, {}), + # pagination_strategy=MinMaxDatetime(datetime="0202020", options={}), + decoder=JsonDecoder( + tech_optional=SingleSlice({}), + options={}, + ), + config={}, + url_base="https://sample.api/v1/", + options={}, + ) + + validate(paginator.to_dict(), schema) + + print(DeclarativeStream.json_schema()) + + +def test_full_schema_print(): + unexpanded_schema = DeclarativeStream.json_schema() + + unionized_class = replace_unions(DeclarativeStream, dict()) + unionized_schema = unionized_class.json_schema() + + print(unexpanded_schema) + print(unionized_schema) + + +def test_bespoke(): + schema = DeclarativeStream.get_schema() + print(schema) + + +def replace_unions(current_class: Type, visited: Mapping[Type, Any]) -> Type: + if current_class in visited: + return current_class + visited[current_class] = True + + og_bases = current_class.__bases__ + og_dict = dict(current_class.__dict__) + copy_cls = type(current_class.__name__, og_bases, og_dict) + if not dataclasses.is_dataclass(copy_cls): + return copy_cls + class_fields = fields(copy_cls) + for field in class_fields: + some_field = field.type + module = some_field.__module__ + if module != "builtins" and module != "typing": + subclasses = some_field.__subclasses__() + if subclasses: + + for subclass in subclasses: + replace_unions(subclass, visited) + print(subclasses) + copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] + return copy_cls From 5ca8e9e01a51919beec40d51be6257f544be9d9a Mon Sep 17 00:00:00 2001 From: brianjlai Date: Sun, 14 Aug 2022 02:35:37 -0700 Subject: [PATCH 02/28] actually a working validator and fixes to the schema that went uncaught --- .../sources/declarative/declarative_stream.py | 4 +- .../sources/declarative/parsers/factory.py | 113 ++++---- .../declarative/requesters/http_requester.py | 10 +- .../retrievers/simple_retriever.py | 4 +- .../stream_slicers/datetime_stream_slicer.py | 21 +- .../declarative/transformations/add_fields.py | 4 +- .../declarative/yaml_declarative_source.py | 97 ++----- .../sources/declarative/test_factory.py | 249 +++++++++++------- .../test_yaml_declarative_source.py | 158 ++++++++++- 9 files changed, 424 insertions(+), 236 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py index 100a76a1035f1..7b87470091084 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py @@ -36,9 +36,9 @@ class DeclarativeStream(Stream, JsonSchemaMixin): config: Config options: InitVar[Mapping[str, Any]] name: str - _name: str = field(init=False, repr=False) + _name: str = field(init=False, repr=False, default="") primary_key: Optional[Union[str, List[str], List[List[str]]]] - _primary_key: str = field(init=False, repr=False) + _primary_key: str = field(init=False, repr=False, default="") stream_cursor_field: Optional[List[str]] = None transformations: List[RecordTransformation] = None checkpoint_interval: Optional[int] = None diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py index 592a0b7beb234..7bc9f6ca9374b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py @@ -7,6 +7,7 @@ import copy import enum import importlib +import typing from dataclasses import fields from typing import Any, List, Literal, Mapping, Type, Union, get_args, get_origin, get_type_hints @@ -98,11 +99,10 @@ class DeclarativeComponentFactory: """ - def __init__(self, instantiate: bool = True): + def __init__(self): self._interpolator = JinjaInterpolation() - self.instantiate = instantiate - def create_component(self, component_definition: ComponentDefinition, config: Config): + def create_component(self, component_definition: ComponentDefinition, config: Config, instantiate: bool = True): """ Create a component defined by `component_definition`. @@ -118,9 +118,15 @@ def create_component(self, component_definition: ComponentDefinition, config: Co class_name = CLASS_TYPES_REGISTRY[kwargs.pop("type")] else: raise ValueError(f"Failed to create component because it has no class_name or type. Definition: {component_definition}") - return self.build(class_name, config, **kwargs) - - def build(self, class_or_class_name: Union[str, Type], config, **kwargs): + kwargs.pop("config", None) + return self.build( + class_name, + config, + instantiate, + **kwargs, + ) + + def build(self, class_or_class_name: Union[str, Type], config, instantiate: bool = True, **kwargs): if isinstance(class_or_class_name, str): class_ = self._get_class_from_fully_qualified_class_name(class_or_class_name) else: @@ -128,32 +134,27 @@ def build(self, class_or_class_name: Union[str, Type], config, **kwargs): # create components in options before propagating them if OPTIONS_STR in kwargs: - kwargs[OPTIONS_STR] = {k: self._create_subcomponent(k, v, kwargs, config, class_) for k, v in kwargs[OPTIONS_STR].items()} + kwargs[OPTIONS_STR] = { + k: self._create_subcomponent(k, v, kwargs, config, class_, instantiate) for k, v in kwargs[OPTIONS_STR].items() + } - updated_kwargs = {k: self._create_subcomponent(k, v, kwargs, config, class_) for k, v in kwargs.items()} + updated_kwargs = {k: self._create_subcomponent(k, v, kwargs, config, class_, instantiate) for k, v in kwargs.items()} - if self.instantiate: + if instantiate: return create(class_, config=config, **updated_kwargs) else: - # generate the schema for the current class (include a subcall to remap the interface to the a union) - self._transform_interface_to_union(class_) + # Because the component's data fields definitions use interfaces, we need to resolve the underlying types into the + # concrete classes that implement the interface before generating the schema + DeclarativeComponentFactory._transform_interface_to_union(class_) schema = class_.json_schema() - # Hack to properly override the schema to check enum type. Ideally would not do this sort of thing - # if transformed_schema is HttpRequester: - # schema['properties']['http_method']['anyOf'][1] = {"enum": [HttpMethod.GET, HttpMethod.POST]} - - # Validate against the concrete object as a result of invoking the create function - # component_func = create(class_, config=config, **updated_kwargs) - # component = component_func() - # validate(component.to_dict(), schema) - # return component_func - - # Validate using the component definition (not sure why it can't validate instances saying they're not objects) - component_definition = {**updated_kwargs, **{k: v for k, v in updated_kwargs[OPTIONS_STR].items() if k not in updated_kwargs}} - component_definition["config"] = config - # schema['type'] = 'dict' + component_definition = { + **updated_kwargs, + **{k: v for k, v in updated_kwargs.get(OPTIONS_STR, {}).items() if k not in updated_kwargs}, + "config": config, + } validate(component_definition, schema) + return lambda: component_definition @staticmethod def _get_class_from_fully_qualified_class_name(class_name: str): @@ -166,7 +167,7 @@ def _get_class_from_fully_qualified_class_name(class_name: str): def _merge_dicts(d1, d2): return {**d1, **d2} - def _create_subcomponent(self, key, definition, kwargs, config, parent_class): + def _create_subcomponent(self, key, definition, kwargs, config, parent_class, instantiate: bool = True): """ There are 5 ways to define a component. 1. dict with "class_name" field -> create an object of type "class_name" @@ -178,14 +179,14 @@ def _create_subcomponent(self, key, definition, kwargs, config, parent_class): if self.is_object_definition_with_class_name(definition): # propagate kwargs to inner objects definition[OPTIONS_STR] = self._merge_dicts(kwargs.get(OPTIONS_STR, dict()), definition.get(OPTIONS_STR, dict())) - return self.create_component(definition, config)() + return self.create_component(definition, config, instantiate)() elif self.is_object_definition_with_type(definition): # If type is set instead of class_name, get the class_name from the CLASS_TYPES_REGISTRY definition[OPTIONS_STR] = self._merge_dicts(kwargs.get(OPTIONS_STR, dict()), definition.get(OPTIONS_STR, dict())) object_type = definition.pop("type") class_name = CLASS_TYPES_REGISTRY[object_type] definition["class_name"] = class_name - return self.create_component(definition, config)() + return self.create_component(definition, config, instantiate)() elif isinstance(definition, dict): # Try to infer object type expected_type = self.get_default_type(key, parent_class) @@ -194,17 +195,22 @@ def _create_subcomponent(self, key, definition, kwargs, config, parent_class): if expected_type and not self._is_builtin_type(expected_type): definition["class_name"] = expected_type definition[OPTIONS_STR] = self._merge_dicts(kwargs.get(OPTIONS_STR, dict()), definition.get(OPTIONS_STR, dict())) - return self.create_component(definition, config)() + return self.create_component(definition, config, instantiate)() else: return definition elif isinstance(definition, list): return [ self._create_subcomponent( - key, sub, self._merge_dicts(kwargs.get(OPTIONS_STR, dict()), self._get_subcomponent_options(sub)), config, parent_class + key, + sub, + self._merge_dicts(kwargs.get(OPTIONS_STR, dict()), self._get_subcomponent_options(sub)), + config, + parent_class, + instantiate, ) for sub in definition ] - else: + elif instantiate: expected_type = self.get_default_type(key, parent_class) if expected_type and not isinstance(definition, expected_type): # call __init__(definition) if definition is not a dict and is not of the expected type @@ -218,8 +224,7 @@ def _create_subcomponent(self, key, definition, kwargs, config, parent_class): return expected_type(definition, options=options) except Exception as e: raise Exception(f"failed to instantiate type {expected_type}. {e}") - else: - return definition + return definition @staticmethod def is_object_definition_with_class_name(definition): @@ -265,20 +270,38 @@ def _is_builtin_type(cls) -> bool: return cls.__module__ == "builtins" @staticmethod - # def _transform_interface_to_union(cls: type, cache: Mapping[type, List[type]]): def _transform_interface_to_union(cls: type): - og_bases = cls.__bases__ - og_dict = dict(cls.__dict__) - copy_cls = type(cls.__name__, og_bases, og_dict) - # needed if we accidentally pull in nondeclarative interface implementers like legacy NoAuth (fixed now actually) - # if not dataclasses.is_dataclass(copy_cls): - # return copy_cls + copy_cls = type(cls.__name__ + "Copy", cls.__bases__, dict(cls.__dict__)) class_fields = fields(copy_cls) for field in class_fields: - some_field = field.type - module = some_field.__module__ + unpacked_field_types = DeclarativeComponentFactory.unpack(field.type) + copy_cls.__annotations__[field.name] = unpacked_field_types + + @staticmethod + def unpack(field_type: type): + """ + Recursive function that takes in a field type and unpacks the underlying fields (if it is a generic) or + returns the field type if it is not in a generic container + :param field_type: The current set of field types to unpack + :return: A list of unpacked types + """ + generic_type = typing.get_origin(field_type) + if generic_type is None: + # Functions as the base case since the origin is none for non-typing classes. If it is an interface then we derive + # and return the union of its subclasses or return the original type if it is a concrete class or a primitive type + module = field_type.__module__ if module != "builtins" and module != "typing": - subclasses = some_field.__subclasses__() + subclasses = field_type.__subclasses__() if subclasses: - copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] - # return copy_cls + return Union[tuple(subclasses)] + return field_type + elif generic_type is list or generic_type is Union: + unpacked_types = [DeclarativeComponentFactory.unpack(underlying_type) for underlying_type in typing.get_args(field_type)] + if generic_type is list: + # For lists we extract the underlying list type and attempt to unpack it again since it could be another container + return List[Union[tuple(unpacked_types)]] + elif generic_type is Union: + # For Unions (and Options which evaluate into a Union of types and NoneType) we unpack the underlying type since it could + # be another container + return Union[tuple(unpacked_types)] + return field_type diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index c74e6eaaeee73..14dadeab94149 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -28,8 +28,8 @@ class HttpRequester(Requester, JsonSchemaMixin): Attributes: name (str): Name of the stream. Only used for request/response caching - url_base (InterpolatedString): Base url to send requests to - path (InterpolatedString): Path to send requests to + url_base (Union[InterpolatedString, str]): Base url to send requests to + path (Union[InterpolatedString, str]): Path to send requests to http_method (Union[str, HttpMethod]): HTTP method to use when sending requests request_options_provider (Optional[InterpolatedRequestOptionsProvider]): request option provider defining the options to set on outgoing requests authenticator (DeclarativeAuthenticator): Authenticator defining how to authenticate to the source @@ -38,8 +38,8 @@ class HttpRequester(Requester, JsonSchemaMixin): """ name: str - url_base: InterpolatedString - path: InterpolatedString + url_base: Union[InterpolatedString, str] + path: Union[InterpolatedString, str] config: Config options: InitVar[Mapping[str, Any]] http_method: Union[str, HttpMethod] = HttpMethod.GET @@ -48,6 +48,8 @@ class HttpRequester(Requester, JsonSchemaMixin): error_handler: Optional[ErrorHandler] = None def __post_init__(self, options: Mapping[str, Any]): + self.url_base = InterpolatedString.create(self.url_base, options=options) + self.path = InterpolatedString.create(self.path, options=options) if self.request_options_provider is None: self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, options=options) elif isinstance(self.request_options_provider, dict): diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index d019c87b95c54..2eea3237daf25 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -48,9 +48,9 @@ class SimpleRetriever(Retriever, HttpStream, JsonSchemaMixin): record_selector: HttpSelector options: InitVar[Mapping[str, Any]] name: str - _name: str = field(init=False, repr=False) + _name: str = field(init=False, repr=False, default="") primary_key: Optional[Union[str, List[str], List[List[str]]]] - _primary_key: str = field(init=False, repr=False) + _primary_key: str = field(init=False, repr=False, default="") paginator: Optional[Paginator] = None stream_slicer: Optional[StreamSlicer] = SingleSlice(options={}) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index ff08da789638e..3853755796770 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -5,7 +5,7 @@ import datetime import re from dataclasses import InitVar, dataclass, field -from typing import Any, Iterable, Mapping, Optional +from typing import Any, Iterable, Mapping, Optional, Union from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser @@ -40,10 +40,10 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): Full list of accepted format codes: https://man7.org/linux/man-pages/man3/strftime.3.html Attributes: - start_datetime (MinMaxDatetime): the datetime that determines the earliest record that should be synced - end_datetime (MinMaxDatetime): the datetime that determines the last record that should be synced + start_datetime (Union[MinMaxDatetime, str]): the datetime that determines the earliest record that should be synced + end_datetime (Union[MinMaxDatetime, str]): the datetime that determines the last record that should be synced step (str): size of the timewindow - cursor_field (InterpolatedString): record's cursor field + cursor_field (Union[InterpolatedString, str]): record's cursor field datetime_format (str): format of the datetime config (Config): connection config start_time_option (Optional[RequestOption]): request option for start time @@ -53,10 +53,10 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): lookback_window (Optional[InterpolatedString]): how many days before start_datetime to read data for """ - start_datetime: MinMaxDatetime - end_datetime: MinMaxDatetime + start_datetime: Union[MinMaxDatetime, str] + end_datetime: Union[MinMaxDatetime, str] step: str - cursor_field: InterpolatedString + cursor_field: Union[InterpolatedString, str] datetime_format: str config: Config options: InitVar[Mapping[str, Any]] @@ -66,11 +66,16 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): end_time_option: Optional[RequestOption] = None stream_state_field_start: Optional[str] = None stream_state_field_end: Optional[str] = None - lookback_window: Optional[InterpolatedString] = None + lookback_window: Optional[Union[InterpolatedString, str]] = None timedelta_regex = re.compile(r"((?P[\.\d]+?)w)?" r"((?P[\.\d]+?)d)?$") def __post_init__(self, options: Mapping[str, Any]): + if not isinstance(self.start_datetime, MinMaxDatetime): + self.start_datetime = MinMaxDatetime(self.start_datetime, options) + if not isinstance(self.end_datetime, MinMaxDatetime): + self.end_datetime = MinMaxDatetime(self.end_datetime, options) + self._timezone = datetime.timezone.utc self._interpolation = JinjaInterpolation() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py index 51ed5468acbd3..87098b1d9c2c2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py @@ -13,7 +13,7 @@ @dataclass(frozen=True) -class AddedFieldDefinition: +class AddedFieldDefinition(JsonSchemaMixin): """Defines the field to add on a record""" path: FieldPointer @@ -22,7 +22,7 @@ class AddedFieldDefinition: @dataclass(frozen=True) -class ParsedAddFieldDefinition: +class ParsedAddFieldDefinition(JsonSchemaMixin): """Defines the field to add on a record""" path: FieldPointer diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 5c7ac93b56bfa..8ebffb8bd4149 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -4,10 +4,8 @@ import json import logging -import typing -from dataclasses import dataclass, fields -from enum import EnumMeta -from typing import Any, List, Mapping, Union +from dataclasses import dataclass +from typing import Any, List, Mapping from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -16,7 +14,6 @@ from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser -from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.streams.core import Stream from dataclasses_jsonschema import JsonSchemaMixin from jsonschema.validators import validate @@ -42,6 +39,8 @@ def __init__(self, path_to_yaml): self._path_to_yaml = path_to_yaml self._source_config = self._read_and_parse_yaml_file(path_to_yaml) + self._validate_source() + # Stopgap to protect the top-level namespace until it's validated through the schema unknown_fields = [key for key in self._source_config.keys() if key not in self.VALID_TOP_LEVEL_FIELDS] if unknown_fields: @@ -59,84 +58,24 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: "parsed YAML into declarative source", extra={"path_to_yaml_file": self._path_to_yaml, "source_name": self.name, "parsed_config": json.dumps(self._source_config)}, ) - - stream_configs = self._source_config["streams"] - for s in stream_configs: - if "class_name" not in s: - s["class_name"] = "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" - return [self._factory.create_component(stream_config, config)() for stream_config in self._source_config["streams"]] + return [self._factory.create_component(stream_config, config, True)() for stream_config in self._stream_configs()] def _read_and_parse_yaml_file(self, path_to_yaml_file): with open(path_to_yaml_file, "r") as f: config_content = f.read() - # Add schema validation entry point here using the factory - parsed_config = YamlParser().parse(config_content) + return YamlParser().parse(config_content) - self._validate_source(parsed_config) - return parsed_config - - def _validate_source(self, parsed_config): - concrete_source = ConcreteDeclarativeSource( - checker=self._source_config["check"], - streams=[self._factory.create_component(stream_config, {})() for stream_config in self._source_config["streams"]], - ) + def _validate_source(self): + full_config = { + "checker": self._source_config["check"], + "streams": [self._factory.create_component(stream_config, {}, False)() for stream_config in self._stream_configs()], + } declarative_source_schema = ConcreteDeclarativeSource.json_schema() - validate(concrete_source, declarative_source_schema) - - @classmethod - def generate_schema(cls) -> str: - expanded_source_definition = cls.expand_schema_interfaces(ConcreteDeclarativeSource, {}) - expanded_schema = expanded_source_definition.json_schema() - return json.dumps(expanded_schema, cls=SchemaEncoder) - - # Seriously me right now - # https://i.kym-cdn.com/entries/icons/original/000/022/524/tumblr_o16n2kBlpX1ta3qyvo1_1280.jpg - @classmethod - def expand_schema_interfaces(cls, expand_class: type, cache: dict) -> type: - if expand_class.__name__ in cache: - return expand_class - - # We don't need to expand enums - if isinstance(expand_class, EnumMeta): - return expand_class + validate(full_config, declarative_source_schema) - # We can't parse CDK constructs past the declarative level - if expand_class.__name__ == "Stream" or expand_class.__name__ == "HttpStream": - return expand_class - - cache[expand_class.__name__] = expand_class - - copy_cls = type(expand_class.__name__, expand_class.__bases__, dict(expand_class.__dict__)) - class_fields = fields(copy_cls) - for field in class_fields: - unpacked_types = cls.unpack(field.type) - for field_type in unpacked_types: - module = field_type.__module__ - if module != "builtins" and module != "typing" and module != "pendulum.datetime": - # Also need to traverse down each objects fields - if field_type not in cache: - cls.expand_schema_interfaces(field_type, cache) - - subclasses = field_type.__subclasses__() - for subclass in subclasses: - cls.expand_schema_interfaces(subclass, cache) - if subclasses: - copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] - return copy_cls - - # For components that are stored behind generics like List, Union, Optional, etc, we need to unpack the underlying type - @classmethod - def unpack(cls, field_type) -> typing.Tuple: - origin = typing.get_origin(field_type) - if origin == list or origin == Union: - return typing.get_args(field_type) - return (field_type,) - - -class SchemaEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, property): - return str(obj) - elif isinstance(obj, HttpMethod): - return str(obj) - return json.JSONEncoder.default(self, obj) + def _stream_configs(self): + stream_configs = self._source_config["streams"] + for s in stream_configs: + if "class_name" not in s: + s["class_name"] = "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" + return stream_configs diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index e557e53ffb209..f6079fdaada48 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -3,7 +3,9 @@ # import datetime +from typing import List, Optional, Union +import pytest from airbyte_cdk.sources.declarative.auth.token import BasicHttpAuthenticator from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream @@ -14,6 +16,13 @@ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser +from airbyte_cdk.sources.declarative.requesters.error_handlers import BackoffStrategy +from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( + ConstantBackoffStrategy, + ExponentialBackoffStrategy, + WaitTimeFromHeaderBackoffStrategy, + WaitUntilTimeFromHeaderBackoffStrategy, +) from airbyte_cdk.sources.declarative.requesters.error_handlers.composite_error_handler import CompositeErrorHandler from airbyte_cdk.sources.declarative.requesters.error_handlers.default_error_handler import DefaultErrorHandler from airbyte_cdk.sources.declarative.requesters.error_handlers.http_response_filter import HttpResponseFilter @@ -30,7 +39,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition -from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource +from jsonschema import ValidationError factory = DeclarativeComponentFactory() @@ -41,7 +50,7 @@ def test_factory(): content = """ - limit: 50 + limit: "50" offset_request_parameters: offset: "{{ next_page_token['offset'] }}" limit: "*ref(limit)" @@ -54,6 +63,9 @@ def test_factory(): body_offset: "{{ next_page_token['offset'] }}" """ config = parser.parse(content) + + factory.create_component(config["request_options"], input_config, False) + request_options_provider = factory.create_component(config["request_options"], input_config)() assert type(request_options_provider) == InterpolatedRequestOptionsProvider @@ -77,10 +89,8 @@ def test_interpolate_config(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["authenticator"], input_config) + factory.create_component(config["authenticator"], input_config, False) - factory.instantiate = True authenticator = factory.create_component(config["authenticator"], input_config)() assert authenticator.client_id.eval(input_config) == "some_client_id" assert authenticator.client_secret.string == "some_client_secret" @@ -101,10 +111,8 @@ def test_list_based_stream_slicer_with_values_refd(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["stream_slicer"], input_config) + factory.create_component(config["stream_slicer"], input_config, False) - factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert ["airbyte", "airbyte-cloud"] == stream_slicer.slice_values @@ -121,10 +129,8 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["stream_slicer"], input_config) + factory.create_component(config["stream_slicer"], input_config, False) - factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert ["airbyte", "airbyte-cloud"] == stream_slicer.slice_values assert stream_slicer.request_option.inject_into == RequestOptionType.header @@ -175,10 +181,6 @@ def test_create_substream_slicer(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["stream_slicer"], input_config) - - factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() parent_stream_configs = stream_slicer.parent_stream_configs assert len(parent_stream_configs) == 2 @@ -214,10 +216,7 @@ def test_create_cartesian_stream_slicer(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["stream_slicer"], input_config) - - factory.instantiate = True + factory.create_component(config["stream_slicer"], input_config, False) stream_slicer = factory.create_component(config["stream_slicer"], input_config)() underlying_slicers = stream_slicer.stream_slicers @@ -249,10 +248,8 @@ def test_datetime_stream_slicer(): config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["stream_slicer"], input_config) + factory.create_component(config["stream_slicer"], input_config, False) - factory.instantiate = True stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert type(stream_slicer) == DatetimeStreamSlicer assert stream_slicer._timezone == datetime.timezone.utc @@ -349,10 +346,8 @@ def test_full_config(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["list_stream"], input_config) + factory.create_component(config["list_stream"], input_config, False) - factory.instantiate = True stream_config = config["list_stream"] assert stream_config["class_name"] == "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" assert stream_config["cursor_field"] == [] @@ -398,10 +393,8 @@ def test_create_record_selector(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["selector"], input_config) + factory.create_component(config["selector"], input_config, False) - factory.instantiate = True selector = factory.create_component(config["selector"], input_config)() assert isinstance(selector, RecordSelector) assert isinstance(selector.extractor, DpathExtractor) @@ -429,10 +422,8 @@ def test_create_requester(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["requester"], input_config) + factory.create_component(config["requester"], input_config, False) - factory.instantiate = True component = factory.create_component(config["requester"], input_config)() assert isinstance(component, HttpRequester) assert isinstance(component.error_handler, DefaultErrorHandler) @@ -461,10 +452,8 @@ def test_create_composite_error_handler(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["error_handler"], input_config) + factory.create_component(config["error_handler"], input_config, False) - factory.instantiate = True component = factory.create_component(config["error_handler"], input_config)() assert len(component.error_handlers) == 2 assert isinstance(component.error_handlers[0], DefaultErrorHandler) @@ -512,10 +501,8 @@ def test_config_with_defaults(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["lists_stream"], input_config) + factory.create_component(config["lists_stream"], input_config, False) - factory.instantiate = True stream_config = config["lists_stream"] stream = factory.create_component(stream_config, input_config)() assert type(stream) == DeclarativeStream @@ -551,10 +538,8 @@ def test_create_limit_paginator(): """ config = parser.parse(content) - factory.instantiate = False - factory.create_component(config["paginator"], input_config) + factory.create_component(config["paginator"], input_config, False) - factory.instantiate = True paginator_config = config["paginator"] paginator = factory.create_component(paginator_config, input_config)() assert isinstance(paginator, LimitPaginator) @@ -591,6 +576,9 @@ def test_no_transformations(self): {self.base_options} """ config = parser.parse(content) + + factory.create_component(config["the_stream"], input_config, False) + component = factory.create_component(config["the_stream"], input_config)() assert isinstance(component, DeclarativeStream) assert [] == component.transformations @@ -608,6 +596,9 @@ def test_remove_fields(self): - ["path2"] """ config = parser.parse(content) + + factory.create_component(config["the_stream"], input_config, False) + component = factory.create_component(config["the_stream"], input_config)() assert isinstance(component, DeclarativeStream) expected = [RemoveFields(field_pointers=[["path", "to", "field1"], ["path2"]], options={})] @@ -626,6 +617,9 @@ def test_add_fields(self): value: "static_value" """ config = parser.parse(content) + + factory.create_component(config["the_stream"], input_config, False) + component = factory.create_component(config["the_stream"], input_config)() assert isinstance(component, DeclarativeStream) expected = [ @@ -644,8 +638,7 @@ def test_add_fields(self): def test_validation_wrong_input_type(): content = """ extractor: - type: JelloExtractor - transform: "_.result" + type: DpathExtractor selector: class_name: airbyte_cdk.sources.declarative.extractors.record_selector.RecordSelector record_filter: @@ -653,42 +646,74 @@ def test_validation_wrong_input_type(): condition: "{{ record['id'] > stream_state['id'] }}" extractor: $ref: "*ref(extractor)" - transform: 408 + field_pointer: 408 """ config = parser.parse(content) - - factory.instantiate = False - factory.create_component(config["selector"], input_config) + with pytest.raises(ValidationError): + factory.create_component(config["selector"], input_config, False) def test_validation_type_missing_required_fields(): content = """ stream_slicer: - type: DatetimeStreamSlicer - $options: - datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" - start_datetime: - type: MinMaxDatetime - datetime: "{{ config['start_time'] }}" - min_datetime: "{{ config['start_time'] + day_delta(2) }}" - end_datetime: "{{ config['end_time'] }}" - step: "10d" - cursor_field: "created" - lookback_window: "5d" - start_time_option: - inject_into: request_parameter - field_name: created[gte] + type: DatetimeStreamSlicer + $options: + datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + start_datetime: + type: MinMaxDatetime + datetime: "{{ config['start_time'] }}" + min_datetime: "{{ config['start_time'] + day_delta(2) }}" + end_datetime: "{{ config['end_time'] }}" + cursor_field: "created" + lookback_window: "5d" + start_time_option: + inject_into: request_parameter + field_name: created[gte] """ config = parser.parse(content) + with pytest.raises(ValidationError): + factory.create_component(config["stream_slicer"], input_config, False) + + +def test_validation_wrong_interface_type(): + content = """ + paginator: + type: "LimitPaginator" + page_size: 10 + url_base: "https://airbyte.io" + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "MinMaxDatetime" + datetime: "{{ response._metadata.next }}" + """ + config = parser.parse(content) + with pytest.raises(ValidationError): + factory.create_component(config["paginator"], input_config, False) - factory.instantiate = False - factory.create_component(config["stream_slicer"], input_config) +def test_validation_create_composite_error_handler(): + content = """ + error_handler: + type: "CompositeErrorHandler" + error_handlers: + - response_filters: + - predicate: "{{ 'code' in response }}" + action: RETRY + - response_filters: + - http_codes: [ 403 ] + """ + config = parser.parse(content) + with pytest.raises(ValidationError): + factory.create_component(config["error_handler"], input_config, False) -# It's sort of a bother that this does not "actually" test the type, it just does a if it has fields that look like they type -# then it's a-okay by me... This sort of works, but isn't exactly what we want either since it's not that descriptive. Hence why this works, -# when it really shouldn't + +# Leaving this test here to document a limitation of the validator. Decoder has no meaningful fields to validate on so it accepts +# the MinMaxDatetime despite being the wrong type def test_validation_wrong_object_type(): content = """ paginator: @@ -708,33 +733,77 @@ def test_validation_wrong_object_type(): datetime: "{{ response._metadata.next }}" """ config = parser.parse(content) - - factory.instantiate = False - factory.create_component(config["paginator"], input_config) + factory.create_component(config["paginator"], input_config, False) -def test_validation_wrong_interface_type(): +# This test should fail because the extractor doesn't match the Array of resolved classes. However, despite the schema being correct +# validation passes. Leaving this here to document it and revisit at another time. This is another validator limitation. +def test_validate_types_nested_in_list(): content = """ - paginator: - type: "LimitPaginator" - page_size: 10 - url_base: "https://airbyte.io" - limit_option: - inject_into: request_parameter - field_name: page_size - page_token_option: - inject_into: path - pagination_strategy: - type: "MinMaxDatetime" - datetime: "{{ response._metadata.next }}" + error_handler: + type: DefaultErrorHandler + backoff_strategies: + - type: DpathExtractor + field_pointer: ["result"] """ config = parser.parse(content) - - factory.instantiate = False - factory.create_component(config["paginator"], input_config) - - -def test_demo_schema_generation(): - # declarative_source = YamlDeclarativeSource("test_factory.py") - schema = YamlDeclarativeSource.generate_schema() - print("\n" + schema) + factory.create_component(config["error_handler"], input_config, False) + + +@pytest.mark.parametrize( + "test_name, input_type, expected_unpacked_types", + [ + ( + "test_unpacking_component_in_list", + List[BackoffStrategy], + List[ + Union[ + ConstantBackoffStrategy, + ExponentialBackoffStrategy, + WaitTimeFromHeaderBackoffStrategy, + WaitUntilTimeFromHeaderBackoffStrategy, + ] + ], + ), + ( + "test_unpacking_component_in_union", + Union[BackoffStrategy, RequestOption], + Union[ + ConstantBackoffStrategy, + ExponentialBackoffStrategy, + WaitTimeFromHeaderBackoffStrategy, + WaitUntilTimeFromHeaderBackoffStrategy, + RequestOption, + ], + ), + ( + "test_unpacking_component_in_optional", + Optional[BackoffStrategy], + Union[ + ConstantBackoffStrategy, + ExponentialBackoffStrategy, + WaitTimeFromHeaderBackoffStrategy, + WaitUntilTimeFromHeaderBackoffStrategy, + type(None), + ], + ), + ( + "test_unpacking_component_nested_in_multiple_types", + Optional[List[BackoffStrategy]], + Union[ + List[ + Union[ + ConstantBackoffStrategy, + ExponentialBackoffStrategy, + WaitTimeFromHeaderBackoffStrategy, + WaitUntilTimeFromHeaderBackoffStrategy, + ] + ], + type(None), + ], + ), + ], +) +def test_unpack(test_name, input_type, expected_unpacked_types): + actual_unpacked_types = DeclarativeComponentFactory.unpack(input_type) + assert actual_unpacked_types == expected_unpacked_types diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 91e3f710cb098..595b6cccde225 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -6,16 +6,53 @@ import tempfile import unittest +import pytest from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource +from jsonschema import ValidationError class TestYamlDeclarativeSource(unittest.TestCase): def test_source_is_created_if_toplevel_fields_are_known(self): content = """ version: "version" - streams: "streams" - check: "check" + definitions: + schema_loader: + name: "{{ options.stream_name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" + retriever: + paginator: + type: "LimitPaginator" + page_size: 10 + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "CursorPagination" + cursor_value: "{{ response._metadata.next }}" + requester: + path: "/v3/marketing/lists" + authenticator: + type: "BearerAuthenticator" + api_token: "{{ config.apikey }}" + request_parameters: + page_size: 10 + record_selector: + extractor: + field_pointer: ["result"] + streams: + - type: DeclarativeStream + $options: + name: "lists" + primary_key: id + url_base: "https://api.sendgrid.com" + schema_loader: "*ref(definitions.schema_loader)" + retriever: "*ref(definitions.retriever)" + check: + type: CheckStream + stream_names: ["lists"] """ temporary_file = TestFileContent(content) YamlDeclarativeSource(temporary_file.filename) @@ -23,14 +60,127 @@ def test_source_is_created_if_toplevel_fields_are_known(self): def test_source_is_not_created_if_toplevel_fields_are_unknown(self): content = """ version: "version" - streams: "streams" - check: "check" + definitions: + schema_loader: + name: "{{ options.stream_name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" + retriever: + paginator: + type: "LimitPaginator" + page_size: 10 + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "CursorPagination" + cursor_value: "{{ response._metadata.next }}" + requester: + path: "/v3/marketing/lists" + authenticator: + type: "BearerAuthenticator" + api_token: "{{ config.apikey }}" + request_parameters: + page_size: 10 + record_selector: + extractor: + field_pointer: ["result"] + streams: + - type: DeclarativeStream + $options: + name: "lists" + primary_key: id + url_base: "https://api.sendgrid.com" + schema_loader: "*ref(definitions.schema_loader)" + retriever: "*ref(definitions.retriever)" + check: + type: CheckStream + stream_names: ["lists"] not_a_valid_field: "error" """ temporary_file = TestFileContent(content) with self.assertRaises(InvalidConnectorDefinitionException): YamlDeclarativeSource(temporary_file.filename) + def test_source_missing_checker_fails_validation(self): + content = """ + version: "version" + definitions: + schema_loader: + name: "{{ options.stream_name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" + retriever: + paginator: + type: "LimitPaginator" + page_size: 10 + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "CursorPagination" + cursor_value: "{{ response._metadata.next }}" + requester: + path: "/v3/marketing/lists" + authenticator: + type: "BearerAuthenticator" + api_token: "{{ config.apikey }}" + request_parameters: + page_size: 10 + record_selector: + extractor: + field_pointer: ["result"] + streams: + - type: DeclarativeStream + $options: + name: "lists" + primary_key: id + url_base: "https://api.sendgrid.com" + schema_loader: "*ref(definitions.schema_loader)" + retriever: "*ref(definitions.retriever)" + check: + type: CheckStream + """ + temporary_file = TestFileContent(content) + with pytest.raises(ValidationError): + YamlDeclarativeSource(temporary_file.filename) + + def test_source_with_missing_streams_fails(self): + content = """ + version: "version" + definitions: + check: + type: CheckStream + stream_names: ["lists"] + """ + temporary_file = TestFileContent(content) + with pytest.raises(KeyError): + YamlDeclarativeSource(temporary_file.filename) + + def test_source_with_invalid_stream_config_fails_validation(self): + content = """ + version: "version" + definitions: + schema_loader: + name: "{{ options.stream_name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" + streams: + - type: DeclarativeStream + $options: + name: "lists" + primary_key: id + url_base: "https://api.sendgrid.com" + schema_loader: "*ref(definitions.schema_loader)" + check: + type: CheckStream + stream_names: ["lists"] + """ + temporary_file = TestFileContent(content) + with pytest.raises(ValidationError): + YamlDeclarativeSource(temporary_file.filename) + class TestFileContent: def __init__(self, content): From 385a593bdd2a44020f8323397f2d97c64ad4cba6 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Sun, 14 Aug 2022 02:39:26 -0700 Subject: [PATCH 03/28] remove extra spike file --- .../declarative/transformations/testing.py | 162 ------------------ 1 file changed, 162 deletions(-) delete mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py deleted file mode 100644 index c8544e87246b1..0000000000000 --- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/testing.py +++ /dev/null @@ -1,162 +0,0 @@ -# -# Copyright (c) 2022 Airbyte, Inc., all rights reserved. -# - -import dataclasses -import pprint -from dataclasses import dataclass, fields -from typing import Any, Mapping, Type, Union - -from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream -from airbyte_cdk.sources.declarative.decoders import JsonDecoder -from airbyte_cdk.sources.declarative.requesters import RequestOption -from airbyte_cdk.sources.declarative.requesters.paginators import LimitPaginator -from airbyte_cdk.sources.declarative.requesters.paginators.strategies import PageIncrement -from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType -from airbyte_cdk.sources.declarative.stream_slicers import SingleSlice -from dataclasses_jsonschema import JsonSchemaMixin -from jsonschema.validators import validate - - -@dataclass -class Interface(JsonSchemaMixin): - """This is an interface""" - - -count = 0 - - -@dataclass -class ChildInterfaceString(Interface, JsonSchemaMixin): - """ConcreteType1""" - - child_field: str - - -@dataclass -class ChildInt(Interface): - """ConcreteType2""" - - int_field: int - - -@dataclass -class ConcreteClass(JsonSchemaMixin): - number: int = 15 - - -@dataclass -class SomeOtherClass(JsonSchemaMixin): - """Outerobject containing interface""" - - f: int - - g: ConcreteClass - h: Interface - - -def test_json_schema(): - # copy the top level class (probably DeclarativeSource?) - copy_cls = type("Copymixin", SomeOtherClass.__bases__, dict(SomeOtherClass.__dict__)) - - class_fields = fields(copy_cls) - # iterate over the fields - for field in class_fields: - t = field.type - subsclasses = t.__subclasses__() - # Only replace the type if there are subclasses and t is not in builtins - if subsclasses and t.__module__ != "builtins": - # replace the type with union of subclasses - field.type = Union[tuple(subsclasses)] - copy_cls.__dict__["__annotations__"][field.name] = Union[tuple(subsclasses)] - - json_schema = copy_cls.json_schema() - - pprint.pprint(json_schema) - assert "anyOf" in json_schema["properties"]["h"] - - -# make this a mixin -def test_pagination_strategy(): - # conc = LimitPaginator( - # page_size=100, - # limit_option=RequestOption - # ) - # og_bases = LimitPaginator.__bases__ - # og_dict = dict(LimitPaginator.__dict__) - # copy_name = LimitPaginator.__name__ + "Copy" - # copy_cls = type(LimitPaginator.__name__, og_bases, og_dict) - # class_fields = fields(copy_cls) - # for field in class_fields: - # some_field = field.type - # module = some_field.__module__ - # if module != "builtins" and module != "typing": - # subclasses = some_field.__subclasses__() - # if subclasses: - # copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] - # print(subclasses) - copy_cls = replace_unions(LimitPaginator, dict()) - - schema = copy_cls.json_schema() - print(schema) - - # Test validate - - paginator = LimitPaginator( - page_size=100, - limit_option=RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", options={}), - page_token_option=RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", options={}), - pagination_strategy=PageIncrement(50, {}), - # pagination_strategy=MinMaxDatetime(datetime="0202020", options={}), - decoder=JsonDecoder( - tech_optional=SingleSlice({}), - options={}, - ), - config={}, - url_base="https://sample.api/v1/", - options={}, - ) - - validate(paginator.to_dict(), schema) - - print(DeclarativeStream.json_schema()) - - -def test_full_schema_print(): - unexpanded_schema = DeclarativeStream.json_schema() - - unionized_class = replace_unions(DeclarativeStream, dict()) - unionized_schema = unionized_class.json_schema() - - print(unexpanded_schema) - print(unionized_schema) - - -def test_bespoke(): - schema = DeclarativeStream.get_schema() - print(schema) - - -def replace_unions(current_class: Type, visited: Mapping[Type, Any]) -> Type: - if current_class in visited: - return current_class - visited[current_class] = True - - og_bases = current_class.__bases__ - og_dict = dict(current_class.__dict__) - copy_cls = type(current_class.__name__, og_bases, og_dict) - if not dataclasses.is_dataclass(copy_cls): - return copy_cls - class_fields = fields(copy_cls) - for field in class_fields: - some_field = field.type - module = some_field.__module__ - if module != "builtins" and module != "typing": - subclasses = some_field.__subclasses__() - if subclasses: - - for subclass in subclasses: - replace_unions(subclass, visited) - print(subclasses) - copy_cls.__annotations__[field.name] = Union[tuple(subclasses)] - return copy_cls From a29c048aea4dde79dd1c156f2e4220b61f969c3a Mon Sep 17 00:00:00 2001 From: brianjlai Date: Sun, 14 Aug 2022 03:07:47 -0700 Subject: [PATCH 04/28] fix formatting file --- .../sources/declarative/auth/declarative_authenticator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py index a90329c955500..615a8afbb9ace 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -1,6 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + from abc import ABC from dataclasses import dataclass From 4be31d8c59c713eaa596f8ddc22eaa0b346987ef Mon Sep 17 00:00:00 2001 From: brianjlai Date: Sun, 14 Aug 2022 15:50:53 -0700 Subject: [PATCH 05/28] Add method to generate the complete JSON schema of the low code declarative language --- .../declarative/yaml_declarative_source.py | 78 ++++++++++++++++++- .../sources/declarative/test_factory.py | 6 ++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 8ebffb8bd4149..ead18c9dbea12 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -4,8 +4,10 @@ import json import logging -from dataclasses import dataclass -from typing import Any, List, Mapping +import typing +from dataclasses import dataclass, fields +from enum import EnumMeta +from typing import Any, List, Mapping, Union from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -14,6 +16,7 @@ from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser +from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.streams.core import Stream from dataclasses_jsonschema import JsonSchemaMixin from jsonschema.validators import validate @@ -79,3 +82,74 @@ def _stream_configs(self): if "class_name" not in s: s["class_name"] = "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" return stream_configs + + @staticmethod + def generate_schema() -> str: + expanded_source_definition = YamlDeclarativeSource.expand_schema_interfaces(ConcreteDeclarativeSource, {}) + expanded_schema = expanded_source_definition.json_schema() + return json.dumps(expanded_schema, cls=SchemaEncoder) + + @staticmethod + def expand_schema_interfaces(expand_class: type, visited: dict) -> type: + """ + Recursive function that takes in class type that will have its interface fields unpacked and expended and then recursively + attempt the same expansion on all the class' underlying fields that are declarative component. It also performs expansion + with respect to interfaces that are contained within generic data types. + :param expand_class: The declarative component class that will have its interface fields expanded + :param visited: cache used to store a record of already visited declarative classes that have already been seen + :return: The expanded declarative component + """ + + # Recursive base case to stop recursion if we have already expanded an interface in case of cyclical components + # like CompositeErrorHandler + if expand_class.__name__ in visited: + return visited[expand_class.__name__] + visited[expand_class.__name__] = expand_class + + next_classes = [] + copy_cls = type(expand_class.__name__, expand_class.__bases__, dict(expand_class.__dict__)) + class_fields = fields(copy_cls) + for field in class_fields: + unpacked_field_types = DeclarativeComponentFactory.unpack(field.type) + copy_cls.__annotations__[field.name] = unpacked_field_types + next_classes.extend(YamlDeclarativeSource._get_next_expand_classes(field.type)) + + for next_class in next_classes: + YamlDeclarativeSource.expand_schema_interfaces(next_class, visited) + return copy_cls + + @staticmethod + def _get_next_expand_classes(field_type) -> list[type]: + """ + Parses through a given field type and assembles a list of all underlying declarative components. For a concrete declarative class + it will return itself. For a declarative interface it will return its subclasses. For declarative components in a generic type + it will return the unpacked classes. Any non-declarative types will be skipped. + :param field_type: A field type that + :return: + """ + generic_type = typing.get_origin(field_type) + if generic_type is None: + module = field_type.__module__ + # We can only continue parsing declarative components since we explicitly inherit from the JsonSchemaMixin class which is + # used to generate the final json schema + if "airbyte_cdk.sources.declarative" in module and not isinstance(field_type, EnumMeta): + subclasses = field_type.__subclasses__() + if subclasses: + return subclasses + else: + return [field_type] + elif generic_type == list or generic_type == Union: + next_classes = [] + for underlying_type in typing.get_args(field_type): + next_classes.extend(YamlDeclarativeSource._get_next_expand_classes(underlying_type)) + return next_classes + return [] + + +class SchemaEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, property): + return str(obj) + elif isinstance(obj, HttpMethod): + return str(obj) + return json.JSONEncoder.default(self, obj) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index f6079fdaada48..61d4406494803 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -39,6 +39,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from jsonschema import ValidationError factory = DeclarativeComponentFactory() @@ -807,3 +808,8 @@ def test_validate_types_nested_in_list(): def test_unpack(test_name, input_type, expected_unpacked_types): actual_unpacked_types = DeclarativeComponentFactory.unpack(input_type) assert actual_unpacked_types == expected_unpacked_types + + +def test_complete_schema(): + schema = YamlDeclarativeSource.generate_schema() + print(schema) From 5a534767ab58e5b3c66843f52eddcf28ad7cec24 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Mon, 15 Aug 2022 00:55:07 -0700 Subject: [PATCH 06/28] add testing of a few components during schema gen --- .../strategies/pagination_strategy.py | 3 +- .../test_yaml_declarative_source.py | 125 +++++++++++++++++- 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index a2d9407a833dc..29cacb726483e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -7,11 +7,10 @@ from typing import Any, List, Mapping, Optional import requests -from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class PaginationStrategy(JsonSchemaMixin): +class PaginationStrategy: """ Defines how to get the next page token """ diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 595b6cccde225..3c675f9e8ed22 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -1,7 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # - +import json import os import tempfile import unittest @@ -198,3 +198,126 @@ def __enter__(self): def __exit__(self, type, value, traceback): os.unlink(self.filename) + + +def test_generate_schema(): + schema_str = YamlDeclarativeSource.generate_schema() + schema = json.loads(schema_str) + + assert schema["required"] == ["checker", "streams"] + assert schema["properties"]["checker"]["$ref"] == "#/definitions/CheckStream" + assert schema["properties"]["streams"]["items"]["$ref"] == "#/definitions/DeclarativeStream" + + check_stream = schema["definitions"]["CheckStream"] + assert check_stream["required"] == ["stream_names"] + assert check_stream["properties"]["stream_names"]["type"] == "array" + assert check_stream["properties"]["stream_names"]["items"]["type"] == "string" + + declarative_stream = schema["definitions"]["DeclarativeStream"] + assert declarative_stream["required"] == ["schema_loader", "retriever", "config"] + assert declarative_stream["properties"]["schema_loader"]["$ref"] == "#/definitions/JsonSchema" + assert declarative_stream["properties"]["retriever"]["$ref"] == "#/definitions/SimpleRetriever" + assert declarative_stream["properties"]["name"]["type"] == "string" + assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["type"] == "array" + assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" + assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["type"] == "array" + assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" + assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" + assert declarative_stream["properties"]["primary_key"]["anyOf"][2]["type"] == "string" + assert declarative_stream["properties"]["stream_cursor_field"]["type"] == "array" + assert declarative_stream["properties"]["stream_cursor_field"]["items"]["type"] == "string" + assert declarative_stream["properties"]["transformations"]["type"] == "array" + assert {"$ref": "#/definitions/AddFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] + assert {"$ref": "#/definitions/RemoveFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] + assert declarative_stream["properties"]["checkpoint_interval"]["type"] == "integer" + + simple_retriever = schema["definitions"]["SimpleRetriever"] + assert simple_retriever["required"] == ["requester", "record_selector"] + assert simple_retriever["properties"]["requester"]["$ref"] == "#/definitions/HttpRequester" + assert simple_retriever["properties"]["record_selector"]["$ref"] == "#/definitions/RecordSelector" + assert simple_retriever["properties"]["name"]["type"] == "string" + assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["type"] == "array" + assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" + assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["type"] == "array" + assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" + assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" + assert simple_retriever["properties"]["primary_key"]["anyOf"][2]["type"] == "string" + assert {"$ref": "#/definitions/LimitPaginator"} in simple_retriever["properties"]["paginator"]["anyOf"] + assert {"$ref": "#/definitions/NoPagination"} in simple_retriever["properties"]["paginator"]["anyOf"] + assert {"$ref": "#/definitions/CartesianProductStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/DatetimeStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/ListStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/SingleSlice"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/SubstreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + + http_requester = schema["definitions"]["HttpRequester"] + assert http_requester["required"] == ["name", "url_base", "path", "config"] + assert http_requester["properties"]["name"]["type"] == "string" + assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] + assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] + assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] + assert http_requester["properties"]["http_method"]["anyOf"][0]["type"] == "string" + assert http_requester["properties"]["http_method"]["anyOf"][1]["type"] == "string" + assert "GET" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] + assert "POST" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] + assert http_requester["properties"]["request_options_provider"]["$ref"] == "#/definitions/InterpolatedRequestOptionsProvider" + assert {"$ref": "#/definitions/DeclarativeOauth2Authenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/ApiKeyAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/BearerAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/BasicHttpAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/CompositeErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] + assert {"$ref": "#/definitions/DefaultErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] + + api_key_authenticator = schema["definitions"]["ApiKeyAuthenticator"] + assert api_key_authenticator["required"] == ["header", "api_token", "config"] + assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["header"]["anyOf"] + assert {"type": "string"} in api_key_authenticator["properties"]["header"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["api_token"]["anyOf"] + assert {"type": "string"} in api_key_authenticator["properties"]["api_token"]["anyOf"] + + default_error_handler = schema["definitions"]["DefaultErrorHandler"] + assert default_error_handler["properties"]["response_filters"]["type"] == "array" + assert default_error_handler["properties"]["response_filters"]["items"]["$ref"] == "#/definitions/HttpResponseFilter" + assert default_error_handler["properties"]["max_retries"]["type"] == "integer" + assert default_error_handler["properties"]["backoff_strategies"]["type"] == "array" + assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] + assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ + "anyOf" + ] + assert {"$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + "items" + ]["anyOf"] + assert {"$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + "items" + ]["anyOf"] + + exponential_backoff_strategy = schema["definitions"]["ExponentialBackoffStrategy"] + assert exponential_backoff_strategy["properties"]["factor"]["type"] == "number" + + limit_paginator = schema["definitions"]["LimitPaginator"] + assert limit_paginator["required"] == ["page_size", "limit_option", "page_token_option", "pagination_strategy", "config", "url_base"] + assert limit_paginator["properties"]["page_size"]["type"] == "integer" + assert limit_paginator["properties"]["limit_option"]["$ref"] == "#/definitions/RequestOption" + assert limit_paginator["properties"]["page_token_option"]["$ref"] == "#/definitions/RequestOption" + assert {"$ref": "#/definitions/CursorPaginationStrategy"} in limit_paginator["properties"]["pagination_strategy"]["anyOf"] + assert {"$ref": "#/definitions/OffsetIncrement"} in limit_paginator["properties"]["pagination_strategy"]["anyOf"] + assert {"$ref": "#/definitions/PageIncrement"} in limit_paginator["properties"]["pagination_strategy"]["anyOf"] + assert limit_paginator["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" + assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] + assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] + + cursor_pagination_stategy = schema["definitions"]["CursorPaginationStrategy"] + assert cursor_pagination_stategy["required"] == ["cursor_value", "config"] + assert {"$ref": "#/definitions/InterpolatedString"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] + assert {"type": "string"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] + assert cursor_pagination_stategy["properties"]["stop_condition"]["$ref"] == "#/definitions/InterpolatedBoolean" + assert cursor_pagination_stategy["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" + + list_stream_slicer = schema["definitions"]["ListStreamSlicer"] + assert list_stream_slicer["required"] == ["slice_values", "cursor_field", "config"] + assert {"type": "array", "items": {"type": "string"}} in list_stream_slicer["properties"]["slice_values"]["anyOf"] + assert {"type": "string"} in list_stream_slicer["properties"]["slice_values"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedString"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] + assert {"type": "string"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] + assert list_stream_slicer["properties"]["request_option"]["$ref"] == "#/definitions/RequestOption" From 41eea302027af779d6ae9cd85a2569d4f5bdbb48 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Mon, 15 Aug 2022 22:29:12 -0700 Subject: [PATCH 07/28] pr feedback and a little bit of refactoring --- .../auth/declarative_authenticator.py | 15 +++++- .../sources/declarative/parsers/factory.py | 15 +++--- .../declarative/requesters/http_requester.py | 5 +- .../declarative/yaml_declarative_source.py | 15 ++++-- .../test_yaml_declarative_source.py | 46 ++++++++++++++++++- 5 files changed, 78 insertions(+), 18 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py index 615a8afbb9ace..73810ae32361d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -2,12 +2,23 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC from dataclasses import dataclass +from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator + @dataclass -class DeclarativeAuthenticator(ABC): +class DeclarativeAuthenticator: """ Interface used to associate which authenticators can be used as part of the declarative framework """ + + +class NoAuth(AbstractHeaderAuthenticator, DeclarativeAuthenticator): + @property + def auth_header(self) -> str: + return "" + + @property + def token(self) -> str: + return "" diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py index 7bc9f6ca9374b..54b70fbe5cd97 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py @@ -145,8 +145,9 @@ def build(self, class_or_class_name: Union[str, Type], config, instantiate: bool else: # Because the component's data fields definitions use interfaces, we need to resolve the underlying types into the # concrete classes that implement the interface before generating the schema - DeclarativeComponentFactory._transform_interface_to_union(class_) - schema = class_.json_schema() + class_copy = copy.deepcopy(class_) + DeclarativeComponentFactory._transform_interface_to_union(class_copy) + schema = class_copy.json_schema() component_definition = { **updated_kwargs, @@ -270,12 +271,12 @@ def _is_builtin_type(cls) -> bool: return cls.__module__ == "builtins" @staticmethod - def _transform_interface_to_union(cls: type): - copy_cls = type(cls.__name__ + "Copy", cls.__bases__, dict(cls.__dict__)) - class_fields = fields(copy_cls) + def _transform_interface_to_union(expand_class: type): + class_fields = fields(expand_class) for field in class_fields: unpacked_field_types = DeclarativeComponentFactory.unpack(field.type) - copy_cls.__annotations__[field.name] = unpacked_field_types + expand_class.__annotations__[field.name] = unpacked_field_types + return expand_class @staticmethod def unpack(field_type: type): @@ -290,7 +291,7 @@ def unpack(field_type: type): # Functions as the base case since the origin is none for non-typing classes. If it is an interface then we derive # and return the union of its subclasses or return the original type if it is a concrete class or a primitive type module = field_type.__module__ - if module != "builtins" and module != "typing": + if "airbyte_cdk.sources.declarative" in module: subclasses = field_type.__subclasses__() if subclasses: return Union[tuple(subclasses)] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 14dadeab94149..2e18ce4cafaaf 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -7,8 +7,7 @@ from typing import Any, Mapping, MutableMapping, Optional, Union import requests -from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator -from airbyte_cdk.sources.declarative.auth.token import BasicHttpAuthenticator +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.error_handlers.default_error_handler import DefaultErrorHandler from airbyte_cdk.sources.declarative.requesters.error_handlers.error_handler import ErrorHandler @@ -56,7 +55,7 @@ def __post_init__(self, options: Mapping[str, Any]): self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, **self.request_options_provider) else: self._request_options_provider = self.request_options_provider - self.authenticator = self.authenticator or BasicHttpAuthenticator("", config=self.config, options={}) + self.authenticator = self.authenticator or NoAuth() if type(self.http_method) == str: self.http_method = HttpMethod[self.http_method] self._method = self.http_method diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 8ebffb8bd4149..e1a187c51258e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -21,6 +21,7 @@ @dataclass class ConcreteDeclarativeSource(JsonSchemaMixin): + version: str checker: CheckStream streams: List[DeclarativeStream] @@ -66,15 +67,19 @@ def _read_and_parse_yaml_file(self, path_to_yaml_file): return YamlParser().parse(config_content) def _validate_source(self): - full_config = { - "checker": self._source_config["check"], - "streams": [self._factory.create_component(stream_config, {}, False)() for stream_config in self._stream_configs()], - } + full_config = {} + if "version" in self._source_config: + full_config["version"] = self._source_config["version"] + if "check" in self._source_config: + full_config["checker"] = self._source_config["check"] + streams = [self._factory.create_component(stream_config, {}, False)() for stream_config in self._stream_configs()] + if len(streams) > 0: + full_config["streams"] = streams declarative_source_schema = ConcreteDeclarativeSource.json_schema() validate(full_config, declarative_source_schema) def _stream_configs(self): - stream_configs = self._source_config["streams"] + stream_configs = self._source_config.get("streams", []) for s in stream_configs: if "class_name" not in s: s["class_name"] = "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 595b6cccde225..c6fd689a9822a 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -156,7 +156,51 @@ def test_source_with_missing_streams_fails(self): stream_names: ["lists"] """ temporary_file = TestFileContent(content) - with pytest.raises(KeyError): + with pytest.raises(ValidationError): + YamlDeclarativeSource(temporary_file.filename) + + def test_source_with_missing_version_fails(self): + content = """ + definitions: + schema_loader: + name: "{{ options.stream_name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" + retriever: + paginator: + type: "LimitPaginator" + page_size: 10 + limit_option: + inject_into: request_parameter + field_name: page_size + page_token_option: + inject_into: path + pagination_strategy: + type: "CursorPagination" + cursor_value: "{{ response._metadata.next }}" + requester: + path: "/v3/marketing/lists" + authenticator: + type: "BearerAuthenticator" + api_token: "{{ config.apikey }}" + request_parameters: + page_size: 10 + record_selector: + extractor: + field_pointer: ["result"] + streams: + - type: DeclarativeStream + $options: + name: "lists" + primary_key: id + url_base: "https://api.sendgrid.com" + schema_loader: "*ref(definitions.schema_loader)" + retriever: "*ref(definitions.retriever)" + check: + type: CheckStream + stream_names: ["lists"] + """ + temporary_file = TestFileContent(content) + with pytest.raises(ValidationError): YamlDeclarativeSource(temporary_file.filename) def test_source_with_invalid_stream_config_fails_validation(self): From f55828ff3c8d99233fb2ca16e69ffbb07328939a Mon Sep 17 00:00:00 2001 From: brianjlai Date: Mon, 15 Aug 2022 22:34:00 -0700 Subject: [PATCH 08/28] test for schema version --- .../sources/declarative/yaml_declarative_source.py | 8 +++----- .../sources/declarative/test_yaml_declarative_source.py | 4 +++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index fba68afe77332..f6674cd80d914 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -112,16 +112,14 @@ def expand_schema_interfaces(expand_class: type, visited: dict) -> type: visited[expand_class.__name__] = expand_class next_classes = [] - copy_cls = type(expand_class.__name__, expand_class.__bases__, dict(expand_class.__dict__)) - class_fields = fields(copy_cls) + class_fields = fields(expand_class) for field in class_fields: unpacked_field_types = DeclarativeComponentFactory.unpack(field.type) - copy_cls.__annotations__[field.name] = unpacked_field_types + expand_class.__annotations__[field.name] = unpacked_field_types next_classes.extend(YamlDeclarativeSource._get_next_expand_classes(field.type)) - for next_class in next_classes: YamlDeclarativeSource.expand_schema_interfaces(next_class, visited) - return copy_cls + return expand_class @staticmethod def _get_next_expand_classes(field_type) -> list[type]: diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 16196375a5979..b64ee73dca691 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -248,7 +248,9 @@ def test_generate_schema(): schema_str = YamlDeclarativeSource.generate_schema() schema = json.loads(schema_str) - assert schema["required"] == ["checker", "streams"] + assert "version" in schema["required"] + assert "checker" in schema["required"] + assert "streams" in schema["required"] assert schema["properties"]["checker"]["$ref"] == "#/definitions/CheckStream" assert schema["properties"]["streams"]["items"]["$ref"] == "#/definitions/DeclarativeStream" From 2d71d85717637d09c5f95377ce218254b28b7ab0 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Wed, 17 Aug 2022 12:16:42 -0700 Subject: [PATCH 09/28] fix some types that were erroneously marked as invalid schema --- .../sources/declarative/auth/declarative_authenticator.py | 4 +++- .../airbyte_cdk/sources/declarative/declarative_stream.py | 2 +- .../paginators/strategies/cursor_pagination_strategy.py | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py index 73810ae32361d..4e01d00603357 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator +from dataclasses_jsonschema import JsonSchemaMixin @dataclass @@ -14,7 +15,8 @@ class DeclarativeAuthenticator: """ -class NoAuth(AbstractHeaderAuthenticator, DeclarativeAuthenticator): +@dataclass +class NoAuth(AbstractHeaderAuthenticator, DeclarativeAuthenticator, JsonSchemaMixin): @property def auth_header(self) -> str: return "" diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py index 7b87470091084..bb65aef3ee3c1 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py @@ -39,7 +39,7 @@ class DeclarativeStream(Stream, JsonSchemaMixin): _name: str = field(init=False, repr=False, default="") primary_key: Optional[Union[str, List[str], List[List[str]]]] _primary_key: str = field(init=False, repr=False, default="") - stream_cursor_field: Optional[List[str]] = None + stream_cursor_field: Optional[Union[List[str], str]] = None transformations: List[RecordTransformation] = None checkpoint_interval: Optional[int] = None diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py index 5940936ac6485..0f56074af0161 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py @@ -30,12 +30,14 @@ class CursorPaginationStrategy(PaginationStrategy, JsonSchemaMixin): cursor_value: Union[InterpolatedString, str] config: Config options: InitVar[Mapping[str, Any]] - stop_condition: Optional[InterpolatedBoolean] = None + stop_condition: Optional[Union[InterpolatedBoolean, str]] = None decoder: Decoder = JsonDecoder(options={}) def __post_init__(self, options: Mapping[str, Any]): if isinstance(self.cursor_value, str): self.cursor_value = InterpolatedString.create(self.cursor_value, options=options) + if isinstance(self.stop_condition, str): + self.stop_condition = InterpolatedBoolean(condition=self.stop_condition, options=options) def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Any]: decoded_response = self.decoder.decode(response) From 65a7bd0b58d023c451ab808e9f6ad598858f1e07 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Wed, 17 Aug 2022 15:15:40 -0700 Subject: [PATCH 10/28] some comments --- .../python/airbyte_cdk/sources/declarative/parsers/factory.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py index 54b70fbe5cd97..d29fce812393d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py @@ -109,6 +109,7 @@ def create_component(self, component_definition: ComponentDefinition, config: Co This method will also traverse and instantiate its subcomponents if needed. :param component_definition: The definition of the object to create. :param config: Connector's config + :param instantiate: The factory should create the component when True or instead perform schema validation when False :return: The object to create """ kwargs = copy.deepcopy(component_definition) @@ -118,6 +119,9 @@ def create_component(self, component_definition: ComponentDefinition, config: Co class_name = CLASS_TYPES_REGISTRY[kwargs.pop("type")] else: raise ValueError(f"Failed to create component because it has no class_name or type. Definition: {component_definition}") + + # Because configs are sometimes stored on a component a parent definition, we should remove it and rely on the config + # that is passed down through the factory instead kwargs.pop("config", None) return self.build( class_name, From 561a285714145a4cde81d0a528289870a3fa250d Mon Sep 17 00:00:00 2001 From: brianjlai Date: Wed, 17 Aug 2022 20:25:39 -0700 Subject: [PATCH 11/28] add jsonschemamixin to interfaces --- .../sources/declarative/auth/declarative_authenticator.py | 2 +- .../airbyte_cdk/sources/declarative/decoders/decoder.py | 5 +++-- .../sources/declarative/extractors/http_selector.py | 5 +++-- .../sources/declarative/extractors/record_extractor.py | 5 +++-- .../airbyte_cdk/sources/declarative/parsers/factory.py | 5 +++-- .../requesters/error_handlers/backoff_strategy.py | 3 ++- .../declarative/requesters/error_handlers/error_handler.py | 5 +++-- .../sources/declarative/requesters/paginators/paginator.py | 3 ++- .../requesters/request_options/request_options_provider.py | 5 +++-- .../airbyte_cdk/sources/declarative/requesters/requester.py | 3 ++- .../airbyte_cdk/sources/declarative/retrievers/retriever.py | 5 +++-- .../airbyte_cdk/sources/declarative/schema/schema_loader.py | 6 ++++-- .../sources/declarative/stream_slicers/stream_slicer.py | 3 ++- .../sources/declarative/transformations/transformation.py | 5 +++-- 14 files changed, 37 insertions(+), 23 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py index 4e01d00603357..bad2665169c75 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -9,7 +9,7 @@ @dataclass -class DeclarativeAuthenticator: +class DeclarativeAuthenticator(JsonSchemaMixin): """ Interface used to associate which authenticators can be used as part of the declarative framework """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py index 5ec36516f4fd2..04b13b4ceb415 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py @@ -2,15 +2,16 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Any, List, Mapping, Union import requests +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class Decoder(ABC): +class Decoder(JsonSchemaMixin): """ Decoder strategy to transform a requests.Response into a Mapping[str, Any] """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py index 517f61c70b799..97b4006933024 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py @@ -2,16 +2,17 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Any, List, Mapping, Optional import requests from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class HttpSelector(ABC): +class HttpSelector(JsonSchemaMixin): """ Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering records based on a heuristic. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_extractor.py index 5e2b865156eb2..2f4b84769f293 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_extractor.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_extractor.py @@ -2,16 +2,17 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import List import requests from airbyte_cdk.sources.declarative.types import Record +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class RecordExtractor(ABC): +class RecordExtractor(JsonSchemaMixin): """ Responsible for translating an HTTP response into a list of records by extracting records from the response. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py index d29fce812393d..77487aa4f9f52 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/factory.py @@ -7,6 +7,7 @@ import copy import enum import importlib +import inspect import typing from dataclasses import fields from typing import Any, List, Literal, Mapping, Type, Union, get_args, get_origin, get_type_hints @@ -16,6 +17,7 @@ from airbyte_cdk.sources.declarative.parsers.class_types_registry import CLASS_TYPES_REGISTRY from airbyte_cdk.sources.declarative.parsers.default_implementation_registry import DEFAULT_IMPLEMENTATIONS_REGISTRY from airbyte_cdk.sources.declarative.types import Config +from dataclasses_jsonschema import JsonSchemaMixin from jsonschema.validators import validate ComponentDefinition: Union[Literal, Mapping, List] @@ -294,8 +296,7 @@ def unpack(field_type: type): if generic_type is None: # Functions as the base case since the origin is none for non-typing classes. If it is an interface then we derive # and return the union of its subclasses or return the original type if it is a concrete class or a primitive type - module = field_type.__module__ - if "airbyte_cdk.sources.declarative" in module: + if inspect.isclass(field_type) and issubclass(field_type, JsonSchemaMixin): subclasses = field_type.__subclasses__() if subclasses: return Union[tuple(subclasses)] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py index 00c1b6dff23b6..1bde396f17c1f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py @@ -7,10 +7,11 @@ from typing import Optional import requests +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class BackoffStrategy: +class BackoffStrategy(JsonSchemaMixin): """ Backoff strategy defining how long to wait before retrying a request that resulted in an error. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py index 50b6412ad350e..ef72fe9145ac3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py @@ -2,16 +2,17 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Union import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class ErrorHandler(ABC): +class ErrorHandler(JsonSchemaMixin): """ Defines whether a request was successful and how to handle a failure. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py index 68b18307e0883..bd96fc27b153e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py @@ -8,10 +8,11 @@ import requests from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class Paginator(RequestOptionsProvider): +class Paginator(RequestOptionsProvider, JsonSchemaMixin): """ Defines the token to use to fetch the next page of records from the API. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py index 1be5fa690349f..d17f893dedbe8 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py @@ -2,15 +2,16 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Any, Mapping, MutableMapping, Optional, Union from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class RequestOptionsProvider(ABC): +class RequestOptionsProvider(JsonSchemaMixin): """ Defines the request options to set on an outgoing HTTP request diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py index 24c4211df5ed8..de56a6aef8f23 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py @@ -10,6 +10,7 @@ from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin from requests.auth import AuthBase @@ -22,7 +23,7 @@ class HttpMethod(Enum): POST = "POST" -class Requester(RequestOptionsProvider): +class Requester(RequestOptionsProvider, JsonSchemaMixin): @abstractmethod def get_authenticator(self) -> AuthBase: """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py index a9ae02806425a..45252050a6ec1 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py @@ -2,16 +2,17 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Iterable, List, Optional from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class Retriever(ABC): +class Retriever(JsonSchemaMixin): """ Responsible for fetching a stream's records from an HTTP API source. """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py index 3a0d45316a4e3..822b4f3c5f25c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py @@ -2,13 +2,15 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Any, Mapping +from dataclasses_jsonschema import JsonSchemaMixin + @dataclass -class SchemaLoader(ABC): +class SchemaLoader(JsonSchemaMixin): """Describes a stream's schema""" @abstractmethod diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py index 4ff22ce12c611..6c66895c3af82 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py @@ -9,10 +9,11 @@ from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class StreamSlicer(RequestOptionsProvider): +class StreamSlicer(RequestOptionsProvider, JsonSchemaMixin): """ Slices the stream into a subset of records. Slices enable state checkpointing and data retrieval parallelization. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py index 1b2c429687d0a..2a76621c43fd2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py @@ -2,15 +2,16 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from abc import ABC, abstractmethod +from abc import abstractmethod from dataclasses import dataclass from typing import Optional from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class RecordTransformation(ABC): +class RecordTransformation(JsonSchemaMixin): """ Implementations of this class define transformations that can be applied to records of a stream. """ From 019165d6f57881297618e92ac6a3d2ad6b91b797 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:37:21 -0700 Subject: [PATCH 12/28] update tests now that interfaces are jsonschemamixin --- .../declarative/yaml_declarative_source.py | 8 +- .../test_yaml_declarative_source.py | 113 ++++++++++-------- 2 files changed, 67 insertions(+), 54 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index f6674cd80d914..0019ec5975472 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +import inspect import json import logging import typing @@ -132,10 +133,9 @@ def _get_next_expand_classes(field_type) -> list[type]: """ generic_type = typing.get_origin(field_type) if generic_type is None: - module = field_type.__module__ - # We can only continue parsing declarative components since we explicitly inherit from the JsonSchemaMixin class which is - # used to generate the final json schema - if "airbyte_cdk.sources.declarative" in module and not isinstance(field_type, EnumMeta): + # We can only continue parsing declarative that inherit from the JsonSchemaMixin class because it is used + # to generate the final json schema + if inspect.isclass(field_type) and issubclass(field_type, JsonSchemaMixin) and not isinstance(field_type, EnumMeta): subclasses = field_type.__subclasses__() if subclasses: return subclasses diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index b64ee73dca691..3740fbac3e060 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -1,6 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + import json import os import tempfile @@ -255,39 +256,37 @@ def test_generate_schema(): assert schema["properties"]["streams"]["items"]["$ref"] == "#/definitions/DeclarativeStream" check_stream = schema["definitions"]["CheckStream"] - assert check_stream["required"] == ["stream_names"] + assert {"stream_names"}.issubset(check_stream["required"]) assert check_stream["properties"]["stream_names"]["type"] == "array" assert check_stream["properties"]["stream_names"]["items"]["type"] == "string" declarative_stream = schema["definitions"]["DeclarativeStream"] - assert declarative_stream["required"] == ["schema_loader", "retriever", "config"] + assert {"schema_loader", "retriever", "config"}.issubset(declarative_stream["required"]) assert declarative_stream["properties"]["schema_loader"]["$ref"] == "#/definitions/JsonSchema" assert declarative_stream["properties"]["retriever"]["$ref"] == "#/definitions/SimpleRetriever" assert declarative_stream["properties"]["name"]["type"] == "string" - assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["type"] == "array" - assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" - assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["type"] == "array" - assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" - assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" - assert declarative_stream["properties"]["primary_key"]["anyOf"][2]["type"] == "string" - assert declarative_stream["properties"]["stream_cursor_field"]["type"] == "array" - assert declarative_stream["properties"]["stream_cursor_field"]["items"]["type"] == "string" + assert {"type": "array", "items": {"type": "string"}} in declarative_stream["properties"]["primary_key"]["anyOf"] + assert {"type": "array", "items": {"type": "array", "items": {"type": "string"}}} in declarative_stream["properties"]["primary_key"][ + "anyOf" + ] + assert {"type": "string"} in declarative_stream["properties"]["primary_key"]["anyOf"] + assert {"type": "array", "items": {"type": "string"}} in declarative_stream["properties"]["stream_cursor_field"]["anyOf"] + assert {"type": "string"} in declarative_stream["properties"]["stream_cursor_field"]["anyOf"] assert declarative_stream["properties"]["transformations"]["type"] == "array" assert {"$ref": "#/definitions/AddFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] assert {"$ref": "#/definitions/RemoveFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] assert declarative_stream["properties"]["checkpoint_interval"]["type"] == "integer" - simple_retriever = schema["definitions"]["SimpleRetriever"] - assert simple_retriever["required"] == ["requester", "record_selector"] + simple_retriever = schema["definitions"]["SimpleRetriever"]["allOf"][1] + assert {"requester", "record_selector"}.issubset(simple_retriever["required"]) assert simple_retriever["properties"]["requester"]["$ref"] == "#/definitions/HttpRequester" assert simple_retriever["properties"]["record_selector"]["$ref"] == "#/definitions/RecordSelector" assert simple_retriever["properties"]["name"]["type"] == "string" - assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["type"] == "array" - assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" - assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["type"] == "array" - assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" - assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" - assert simple_retriever["properties"]["primary_key"]["anyOf"][2]["type"] == "string" + assert {"type": "array", "items": {"type": "string"}} in declarative_stream["properties"]["primary_key"]["anyOf"] + assert {"type": "array", "items": {"type": "array", "items": {"type": "string"}}} in declarative_stream["properties"]["primary_key"][ + "anyOf" + ] + assert {"type": "string"} in declarative_stream["properties"]["primary_key"]["anyOf"] assert {"$ref": "#/definitions/LimitPaginator"} in simple_retriever["properties"]["paginator"]["anyOf"] assert {"$ref": "#/definitions/NoPagination"} in simple_retriever["properties"]["paginator"]["anyOf"] assert {"$ref": "#/definitions/CartesianProductStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] @@ -296,17 +295,15 @@ def test_generate_schema(): assert {"$ref": "#/definitions/SingleSlice"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] assert {"$ref": "#/definitions/SubstreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] - http_requester = schema["definitions"]["HttpRequester"] - assert http_requester["required"] == ["name", "url_base", "path", "config"] + http_requester = schema["definitions"]["HttpRequester"]["allOf"][1] + assert {"name", "url_base", "path", "config"}.issubset(http_requester["required"]) assert http_requester["properties"]["name"]["type"] == "string" assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] - assert http_requester["properties"]["http_method"]["anyOf"][0]["type"] == "string" - assert http_requester["properties"]["http_method"]["anyOf"][1]["type"] == "string" - assert "GET" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] - assert "POST" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] + assert {"type": "string"} in http_requester["properties"]["http_method"]["anyOf"] + assert {"type": "string", "enum": ["GET", "POST"]} in http_requester["properties"]["http_method"]["anyOf"] assert http_requester["properties"]["request_options_provider"]["$ref"] == "#/definitions/InterpolatedRequestOptionsProvider" assert {"$ref": "#/definitions/DeclarativeOauth2Authenticator"} in http_requester["properties"]["authenticator"]["anyOf"] assert {"$ref": "#/definitions/ApiKeyAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] @@ -315,34 +312,23 @@ def test_generate_schema(): assert {"$ref": "#/definitions/CompositeErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] assert {"$ref": "#/definitions/DefaultErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] - api_key_authenticator = schema["definitions"]["ApiKeyAuthenticator"] - assert api_key_authenticator["required"] == ["header", "api_token", "config"] + api_key_authenticator = schema["definitions"]["ApiKeyAuthenticator"]["allOf"][1] + assert {"header", "api_token", "config"}.issubset(api_key_authenticator["required"]) assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["header"]["anyOf"] assert {"type": "string"} in api_key_authenticator["properties"]["header"]["anyOf"] assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["api_token"]["anyOf"] assert {"type": "string"} in api_key_authenticator["properties"]["api_token"]["anyOf"] - default_error_handler = schema["definitions"]["DefaultErrorHandler"] + default_error_handler = schema["definitions"]["DefaultErrorHandler"]["allOf"][1] assert default_error_handler["properties"]["response_filters"]["type"] == "array" assert default_error_handler["properties"]["response_filters"]["items"]["$ref"] == "#/definitions/HttpResponseFilter" assert default_error_handler["properties"]["max_retries"]["type"] == "integer" assert default_error_handler["properties"]["backoff_strategies"]["type"] == "array" - assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] - assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ - "anyOf" - ] - assert {"$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ - "items" - ]["anyOf"] - assert {"$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ - "items" - ]["anyOf"] - - exponential_backoff_strategy = schema["definitions"]["ExponentialBackoffStrategy"] - assert exponential_backoff_strategy["properties"]["factor"]["type"] == "number" - limit_paginator = schema["definitions"]["LimitPaginator"] - assert limit_paginator["required"] == ["page_size", "limit_option", "page_token_option", "pagination_strategy", "config", "url_base"] + limit_paginator = schema["definitions"]["LimitPaginator"]["allOf"][1] + assert {"page_size", "limit_option", "page_token_option", "pagination_strategy", "config", "url_base"}.issubset( + limit_paginator["required"] + ) assert limit_paginator["properties"]["page_size"]["type"] == "integer" assert limit_paginator["properties"]["limit_option"]["$ref"] == "#/definitions/RequestOption" assert limit_paginator["properties"]["page_token_option"]["$ref"] == "#/definitions/RequestOption" @@ -353,17 +339,44 @@ def test_generate_schema(): assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] - cursor_pagination_stategy = schema["definitions"]["CursorPaginationStrategy"] - assert cursor_pagination_stategy["required"] == ["cursor_value", "config"] - assert {"$ref": "#/definitions/InterpolatedString"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] - assert {"type": "string"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] - assert cursor_pagination_stategy["properties"]["stop_condition"]["$ref"] == "#/definitions/InterpolatedBoolean" - assert cursor_pagination_stategy["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" + cursor_pagination_strategy = schema["definitions"]["CursorPaginationStrategy"]["allOf"][1] + assert {"cursor_value", "config"}.issubset(cursor_pagination_strategy["required"]) + assert {"$ref": "#/definitions/InterpolatedString"} in cursor_pagination_strategy["properties"]["cursor_value"]["anyOf"] + assert {"type": "string"} in cursor_pagination_strategy["properties"]["cursor_value"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedBoolean"} in cursor_pagination_strategy["properties"]["stop_condition"]["anyOf"] + assert {"type": "string"} in cursor_pagination_strategy["properties"]["stop_condition"]["anyOf"] + assert cursor_pagination_strategy["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" - list_stream_slicer = schema["definitions"]["ListStreamSlicer"] - assert list_stream_slicer["required"] == ["slice_values", "cursor_field", "config"] + list_stream_slicer = schema["definitions"]["ListStreamSlicer"]["allOf"][1] + assert {"slice_values", "cursor_field", "config"}.issubset(list_stream_slicer["required"]) assert {"type": "array", "items": {"type": "string"}} in list_stream_slicer["properties"]["slice_values"]["anyOf"] assert {"type": "string"} in list_stream_slicer["properties"]["slice_values"]["anyOf"] assert {"$ref": "#/definitions/InterpolatedString"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] assert {"type": "string"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] assert list_stream_slicer["properties"]["request_option"]["$ref"] == "#/definitions/RequestOption" + + added_field_definition = schema["definitions"]["AddedFieldDefinition"] + assert {"path", "value"}.issubset(added_field_definition["required"]) + assert added_field_definition["properties"]["path"]["type"] == "array" + assert added_field_definition["properties"]["path"]["items"]["type"] == "string" + assert {"$ref": "#/definitions/InterpolatedString"} in added_field_definition["properties"]["value"]["anyOf"] + assert {"type": "string"} in added_field_definition["properties"]["value"]["anyOf"] + + # There is something very strange about JsonSchemaMixin.json_schema(). For some reason, when this test is called independently + # it will pass. However, when it is invoked with the entire test, certain components won't get generated in the schema. Since + # the generate_schema() method is invoked by itself, this doesn't happen normally, but only in tests that are all called together + # One way to replicate this is add DefaultErrorHandler.json_schema() to the start of this test and uncomment the assertions below + + # assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] + # assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ + # "anyOf" + # ] + # assert {"$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + # "items" + # ]["anyOf"] + # assert {"$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + # "items" + # ]["anyOf"] + # + # exponential_backoff_strategy = schema["definitions"]["ExponentialBackoffStrategy"]["allOf"][1] + # assert exponential_backoff_strategy["properties"]["factor"]["type"] == "number" From e378a7f6182007b7db00f57cd125e1282276179a Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:40:11 -0700 Subject: [PATCH 13/28] accidentally removed a mixin --- .../requesters/paginators/strategies/pagination_strategy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index 29cacb726483e..a2d9407a833dc 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -7,10 +7,11 @@ from typing import Any, List, Mapping, Optional import requests +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class PaginationStrategy: +class PaginationStrategy(JsonSchemaMixin): """ Defines how to get the next page token """ From 7eac17ef699e74917306b5950290efbc7e24d5c6 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:41:40 -0700 Subject: [PATCH 14/28] remove unneeded test --- .../python/unit_tests/sources/declarative/test_factory.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index 61d4406494803..f6079fdaada48 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -39,7 +39,6 @@ from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition -from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from jsonschema import ValidationError factory = DeclarativeComponentFactory() @@ -808,8 +807,3 @@ def test_validate_types_nested_in_list(): def test_unpack(test_name, input_type, expected_unpacked_types): actual_unpacked_types = DeclarativeComponentFactory.unpack(input_type) assert actual_unpacked_types == expected_unpacked_types - - -def test_complete_schema(): - schema = YamlDeclarativeSource.generate_schema() - print(schema) From 33f98e1bd2752fe16bdf082c36aef9ad6549bca0 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:49:12 -0700 Subject: [PATCH 15/28] make comment a little more clear --- .../sources/declarative/test_yaml_declarative_source.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 3740fbac3e060..94a94d888b42d 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -363,9 +363,10 @@ def test_generate_schema(): assert {"type": "string"} in added_field_definition["properties"]["value"]["anyOf"] # There is something very strange about JsonSchemaMixin.json_schema(). For some reason, when this test is called independently - # it will pass. However, when it is invoked with the entire test, certain components won't get generated in the schema. Since - # the generate_schema() method is invoked by itself, this doesn't happen normally, but only in tests that are all called together - # One way to replicate this is add DefaultErrorHandler.json_schema() to the start of this test and uncomment the assertions below + # it will pass. However, when it is invoked with the entire test file, certain components won't get generated in the schema. Since + # the generate_schema() method is invoked by independently so this doesn't happen under normal circumstance when we generate the + # complete schema. It only happens when the tests are all called together. + # One way to replicate this is to add DefaultErrorHandler.json_schema() to the start of this test and uncomment the assertions below # assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] # assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ From 6ef9e101cf93cd720c4e8b22bb2e5dea6fd12db7 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 10:55:42 -0700 Subject: [PATCH 16/28] update changelog --- airbyte-cdk/python/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 5b33f00fb1ed9..f40eec92f1af7 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.77 +- Add schema validation for declarative YAML connector configs + ## 0.1.76 - Bugfix: Correctly set parent slice stream for sub-resource streams From 3612789d9b5b77b6bb517b04d5258e07ee10ea29 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 12:01:46 -0700 Subject: [PATCH 17/28] bump version --- airbyte-cdk/python/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 1c81b9d7441e0..f88512d9bdb59 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.1.76", + version="0.1.77", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", From 74e0e515e398015ede2c0b7278f477249dc2dfa7 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 12:33:08 -0700 Subject: [PATCH 18/28] generic enum not enum class --- .../sources/declarative/yaml_declarative_source.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 0019ec5975472..10a65f4945e16 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -7,7 +7,7 @@ import logging import typing from dataclasses import dataclass, fields -from enum import EnumMeta +from enum import Enum, EnumMeta from typing import Any, List, Mapping, Union from airbyte_cdk.sources.declarative.checks import CheckStream @@ -17,7 +17,6 @@ from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser -from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.streams.core import Stream from dataclasses_jsonschema import JsonSchemaMixin from jsonschema.validators import validate @@ -151,8 +150,6 @@ def _get_next_expand_classes(field_type) -> list[type]: class SchemaEncoder(json.JSONEncoder): def default(self, obj): - if isinstance(obj, property): - return str(obj) - elif isinstance(obj, HttpMethod): + if isinstance(obj, property) or isinstance(obj, Enum): return str(obj) return json.JSONEncoder.default(self, obj) From 4cdccb46abf55893bc7283276cb5192ce2f4b132 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Sun, 14 Aug 2022 15:50:53 -0700 Subject: [PATCH 19/28] Add method to generate the complete JSON schema of the low code declarative language --- .../declarative/yaml_declarative_source.py | 78 ++++++++++++++++++- .../sources/declarative/test_factory.py | 6 ++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index e1a187c51258e..fba68afe77332 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -4,8 +4,10 @@ import json import logging -from dataclasses import dataclass -from typing import Any, List, Mapping +import typing +from dataclasses import dataclass, fields +from enum import EnumMeta +from typing import Any, List, Mapping, Union from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -14,6 +16,7 @@ from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser +from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.streams.core import Stream from dataclasses_jsonschema import JsonSchemaMixin from jsonschema.validators import validate @@ -84,3 +87,74 @@ def _stream_configs(self): if "class_name" not in s: s["class_name"] = "airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream" return stream_configs + + @staticmethod + def generate_schema() -> str: + expanded_source_definition = YamlDeclarativeSource.expand_schema_interfaces(ConcreteDeclarativeSource, {}) + expanded_schema = expanded_source_definition.json_schema() + return json.dumps(expanded_schema, cls=SchemaEncoder) + + @staticmethod + def expand_schema_interfaces(expand_class: type, visited: dict) -> type: + """ + Recursive function that takes in class type that will have its interface fields unpacked and expended and then recursively + attempt the same expansion on all the class' underlying fields that are declarative component. It also performs expansion + with respect to interfaces that are contained within generic data types. + :param expand_class: The declarative component class that will have its interface fields expanded + :param visited: cache used to store a record of already visited declarative classes that have already been seen + :return: The expanded declarative component + """ + + # Recursive base case to stop recursion if we have already expanded an interface in case of cyclical components + # like CompositeErrorHandler + if expand_class.__name__ in visited: + return visited[expand_class.__name__] + visited[expand_class.__name__] = expand_class + + next_classes = [] + copy_cls = type(expand_class.__name__, expand_class.__bases__, dict(expand_class.__dict__)) + class_fields = fields(copy_cls) + for field in class_fields: + unpacked_field_types = DeclarativeComponentFactory.unpack(field.type) + copy_cls.__annotations__[field.name] = unpacked_field_types + next_classes.extend(YamlDeclarativeSource._get_next_expand_classes(field.type)) + + for next_class in next_classes: + YamlDeclarativeSource.expand_schema_interfaces(next_class, visited) + return copy_cls + + @staticmethod + def _get_next_expand_classes(field_type) -> list[type]: + """ + Parses through a given field type and assembles a list of all underlying declarative components. For a concrete declarative class + it will return itself. For a declarative interface it will return its subclasses. For declarative components in a generic type + it will return the unpacked classes. Any non-declarative types will be skipped. + :param field_type: A field type that + :return: + """ + generic_type = typing.get_origin(field_type) + if generic_type is None: + module = field_type.__module__ + # We can only continue parsing declarative components since we explicitly inherit from the JsonSchemaMixin class which is + # used to generate the final json schema + if "airbyte_cdk.sources.declarative" in module and not isinstance(field_type, EnumMeta): + subclasses = field_type.__subclasses__() + if subclasses: + return subclasses + else: + return [field_type] + elif generic_type == list or generic_type == Union: + next_classes = [] + for underlying_type in typing.get_args(field_type): + next_classes.extend(YamlDeclarativeSource._get_next_expand_classes(underlying_type)) + return next_classes + return [] + + +class SchemaEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, property): + return str(obj) + elif isinstance(obj, HttpMethod): + return str(obj) + return json.JSONEncoder.default(self, obj) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index f6079fdaada48..61d4406494803 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -39,6 +39,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from jsonschema import ValidationError factory = DeclarativeComponentFactory() @@ -807,3 +808,8 @@ def test_validate_types_nested_in_list(): def test_unpack(test_name, input_type, expected_unpacked_types): actual_unpacked_types = DeclarativeComponentFactory.unpack(input_type) assert actual_unpacked_types == expected_unpacked_types + + +def test_complete_schema(): + schema = YamlDeclarativeSource.generate_schema() + print(schema) From c60ce3a828cfaf528978abab4642a883b3b01caa Mon Sep 17 00:00:00 2001 From: brianjlai Date: Mon, 15 Aug 2022 00:55:07 -0700 Subject: [PATCH 20/28] add testing of a few components during schema gen --- .../strategies/pagination_strategy.py | 3 +- .../test_yaml_declarative_source.py | 125 +++++++++++++++++- 2 files changed, 125 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index a2d9407a833dc..29cacb726483e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -7,11 +7,10 @@ from typing import Any, List, Mapping, Optional import requests -from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class PaginationStrategy(JsonSchemaMixin): +class PaginationStrategy: """ Defines how to get the next page token """ diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index c6fd689a9822a..16196375a5979 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -1,7 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # - +import json import os import tempfile import unittest @@ -242,3 +242,126 @@ def __enter__(self): def __exit__(self, type, value, traceback): os.unlink(self.filename) + + +def test_generate_schema(): + schema_str = YamlDeclarativeSource.generate_schema() + schema = json.loads(schema_str) + + assert schema["required"] == ["checker", "streams"] + assert schema["properties"]["checker"]["$ref"] == "#/definitions/CheckStream" + assert schema["properties"]["streams"]["items"]["$ref"] == "#/definitions/DeclarativeStream" + + check_stream = schema["definitions"]["CheckStream"] + assert check_stream["required"] == ["stream_names"] + assert check_stream["properties"]["stream_names"]["type"] == "array" + assert check_stream["properties"]["stream_names"]["items"]["type"] == "string" + + declarative_stream = schema["definitions"]["DeclarativeStream"] + assert declarative_stream["required"] == ["schema_loader", "retriever", "config"] + assert declarative_stream["properties"]["schema_loader"]["$ref"] == "#/definitions/JsonSchema" + assert declarative_stream["properties"]["retriever"]["$ref"] == "#/definitions/SimpleRetriever" + assert declarative_stream["properties"]["name"]["type"] == "string" + assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["type"] == "array" + assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" + assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["type"] == "array" + assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" + assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" + assert declarative_stream["properties"]["primary_key"]["anyOf"][2]["type"] == "string" + assert declarative_stream["properties"]["stream_cursor_field"]["type"] == "array" + assert declarative_stream["properties"]["stream_cursor_field"]["items"]["type"] == "string" + assert declarative_stream["properties"]["transformations"]["type"] == "array" + assert {"$ref": "#/definitions/AddFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] + assert {"$ref": "#/definitions/RemoveFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] + assert declarative_stream["properties"]["checkpoint_interval"]["type"] == "integer" + + simple_retriever = schema["definitions"]["SimpleRetriever"] + assert simple_retriever["required"] == ["requester", "record_selector"] + assert simple_retriever["properties"]["requester"]["$ref"] == "#/definitions/HttpRequester" + assert simple_retriever["properties"]["record_selector"]["$ref"] == "#/definitions/RecordSelector" + assert simple_retriever["properties"]["name"]["type"] == "string" + assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["type"] == "array" + assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" + assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["type"] == "array" + assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" + assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" + assert simple_retriever["properties"]["primary_key"]["anyOf"][2]["type"] == "string" + assert {"$ref": "#/definitions/LimitPaginator"} in simple_retriever["properties"]["paginator"]["anyOf"] + assert {"$ref": "#/definitions/NoPagination"} in simple_retriever["properties"]["paginator"]["anyOf"] + assert {"$ref": "#/definitions/CartesianProductStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/DatetimeStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/ListStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/SingleSlice"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + assert {"$ref": "#/definitions/SubstreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] + + http_requester = schema["definitions"]["HttpRequester"] + assert http_requester["required"] == ["name", "url_base", "path", "config"] + assert http_requester["properties"]["name"]["type"] == "string" + assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] + assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] + assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] + assert http_requester["properties"]["http_method"]["anyOf"][0]["type"] == "string" + assert http_requester["properties"]["http_method"]["anyOf"][1]["type"] == "string" + assert "GET" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] + assert "POST" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] + assert http_requester["properties"]["request_options_provider"]["$ref"] == "#/definitions/InterpolatedRequestOptionsProvider" + assert {"$ref": "#/definitions/DeclarativeOauth2Authenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/ApiKeyAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/BearerAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/BasicHttpAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] + assert {"$ref": "#/definitions/CompositeErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] + assert {"$ref": "#/definitions/DefaultErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] + + api_key_authenticator = schema["definitions"]["ApiKeyAuthenticator"] + assert api_key_authenticator["required"] == ["header", "api_token", "config"] + assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["header"]["anyOf"] + assert {"type": "string"} in api_key_authenticator["properties"]["header"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["api_token"]["anyOf"] + assert {"type": "string"} in api_key_authenticator["properties"]["api_token"]["anyOf"] + + default_error_handler = schema["definitions"]["DefaultErrorHandler"] + assert default_error_handler["properties"]["response_filters"]["type"] == "array" + assert default_error_handler["properties"]["response_filters"]["items"]["$ref"] == "#/definitions/HttpResponseFilter" + assert default_error_handler["properties"]["max_retries"]["type"] == "integer" + assert default_error_handler["properties"]["backoff_strategies"]["type"] == "array" + assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] + assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ + "anyOf" + ] + assert {"$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + "items" + ]["anyOf"] + assert {"$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + "items" + ]["anyOf"] + + exponential_backoff_strategy = schema["definitions"]["ExponentialBackoffStrategy"] + assert exponential_backoff_strategy["properties"]["factor"]["type"] == "number" + + limit_paginator = schema["definitions"]["LimitPaginator"] + assert limit_paginator["required"] == ["page_size", "limit_option", "page_token_option", "pagination_strategy", "config", "url_base"] + assert limit_paginator["properties"]["page_size"]["type"] == "integer" + assert limit_paginator["properties"]["limit_option"]["$ref"] == "#/definitions/RequestOption" + assert limit_paginator["properties"]["page_token_option"]["$ref"] == "#/definitions/RequestOption" + assert {"$ref": "#/definitions/CursorPaginationStrategy"} in limit_paginator["properties"]["pagination_strategy"]["anyOf"] + assert {"$ref": "#/definitions/OffsetIncrement"} in limit_paginator["properties"]["pagination_strategy"]["anyOf"] + assert {"$ref": "#/definitions/PageIncrement"} in limit_paginator["properties"]["pagination_strategy"]["anyOf"] + assert limit_paginator["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" + assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] + assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] + + cursor_pagination_stategy = schema["definitions"]["CursorPaginationStrategy"] + assert cursor_pagination_stategy["required"] == ["cursor_value", "config"] + assert {"$ref": "#/definitions/InterpolatedString"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] + assert {"type": "string"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] + assert cursor_pagination_stategy["properties"]["stop_condition"]["$ref"] == "#/definitions/InterpolatedBoolean" + assert cursor_pagination_stategy["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" + + list_stream_slicer = schema["definitions"]["ListStreamSlicer"] + assert list_stream_slicer["required"] == ["slice_values", "cursor_field", "config"] + assert {"type": "array", "items": {"type": "string"}} in list_stream_slicer["properties"]["slice_values"]["anyOf"] + assert {"type": "string"} in list_stream_slicer["properties"]["slice_values"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedString"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] + assert {"type": "string"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] + assert list_stream_slicer["properties"]["request_option"]["$ref"] == "#/definitions/RequestOption" From 743fbaf0d4d36624948764ebb448629fe96984e4 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Mon, 15 Aug 2022 22:34:00 -0700 Subject: [PATCH 21/28] test for schema version --- .../sources/declarative/yaml_declarative_source.py | 8 +++----- .../sources/declarative/test_yaml_declarative_source.py | 4 +++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index fba68afe77332..f6674cd80d914 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -112,16 +112,14 @@ def expand_schema_interfaces(expand_class: type, visited: dict) -> type: visited[expand_class.__name__] = expand_class next_classes = [] - copy_cls = type(expand_class.__name__, expand_class.__bases__, dict(expand_class.__dict__)) - class_fields = fields(copy_cls) + class_fields = fields(expand_class) for field in class_fields: unpacked_field_types = DeclarativeComponentFactory.unpack(field.type) - copy_cls.__annotations__[field.name] = unpacked_field_types + expand_class.__annotations__[field.name] = unpacked_field_types next_classes.extend(YamlDeclarativeSource._get_next_expand_classes(field.type)) - for next_class in next_classes: YamlDeclarativeSource.expand_schema_interfaces(next_class, visited) - return copy_cls + return expand_class @staticmethod def _get_next_expand_classes(field_type) -> list[type]: diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 16196375a5979..b64ee73dca691 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -248,7 +248,9 @@ def test_generate_schema(): schema_str = YamlDeclarativeSource.generate_schema() schema = json.loads(schema_str) - assert schema["required"] == ["checker", "streams"] + assert "version" in schema["required"] + assert "checker" in schema["required"] + assert "streams" in schema["required"] assert schema["properties"]["checker"]["$ref"] == "#/definitions/CheckStream" assert schema["properties"]["streams"]["items"]["$ref"] == "#/definitions/DeclarativeStream" From 4eb1cc9672e13e257e1101a8df476253990f77d7 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:37:21 -0700 Subject: [PATCH 22/28] update tests now that interfaces are jsonschemamixin --- .../declarative/yaml_declarative_source.py | 8 +- .../test_yaml_declarative_source.py | 113 ++++++++++-------- 2 files changed, 67 insertions(+), 54 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index f6674cd80d914..0019ec5975472 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +import inspect import json import logging import typing @@ -132,10 +133,9 @@ def _get_next_expand_classes(field_type) -> list[type]: """ generic_type = typing.get_origin(field_type) if generic_type is None: - module = field_type.__module__ - # We can only continue parsing declarative components since we explicitly inherit from the JsonSchemaMixin class which is - # used to generate the final json schema - if "airbyte_cdk.sources.declarative" in module and not isinstance(field_type, EnumMeta): + # We can only continue parsing declarative that inherit from the JsonSchemaMixin class because it is used + # to generate the final json schema + if inspect.isclass(field_type) and issubclass(field_type, JsonSchemaMixin) and not isinstance(field_type, EnumMeta): subclasses = field_type.__subclasses__() if subclasses: return subclasses diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index b64ee73dca691..3740fbac3e060 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -1,6 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + import json import os import tempfile @@ -255,39 +256,37 @@ def test_generate_schema(): assert schema["properties"]["streams"]["items"]["$ref"] == "#/definitions/DeclarativeStream" check_stream = schema["definitions"]["CheckStream"] - assert check_stream["required"] == ["stream_names"] + assert {"stream_names"}.issubset(check_stream["required"]) assert check_stream["properties"]["stream_names"]["type"] == "array" assert check_stream["properties"]["stream_names"]["items"]["type"] == "string" declarative_stream = schema["definitions"]["DeclarativeStream"] - assert declarative_stream["required"] == ["schema_loader", "retriever", "config"] + assert {"schema_loader", "retriever", "config"}.issubset(declarative_stream["required"]) assert declarative_stream["properties"]["schema_loader"]["$ref"] == "#/definitions/JsonSchema" assert declarative_stream["properties"]["retriever"]["$ref"] == "#/definitions/SimpleRetriever" assert declarative_stream["properties"]["name"]["type"] == "string" - assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["type"] == "array" - assert declarative_stream["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" - assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["type"] == "array" - assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" - assert declarative_stream["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" - assert declarative_stream["properties"]["primary_key"]["anyOf"][2]["type"] == "string" - assert declarative_stream["properties"]["stream_cursor_field"]["type"] == "array" - assert declarative_stream["properties"]["stream_cursor_field"]["items"]["type"] == "string" + assert {"type": "array", "items": {"type": "string"}} in declarative_stream["properties"]["primary_key"]["anyOf"] + assert {"type": "array", "items": {"type": "array", "items": {"type": "string"}}} in declarative_stream["properties"]["primary_key"][ + "anyOf" + ] + assert {"type": "string"} in declarative_stream["properties"]["primary_key"]["anyOf"] + assert {"type": "array", "items": {"type": "string"}} in declarative_stream["properties"]["stream_cursor_field"]["anyOf"] + assert {"type": "string"} in declarative_stream["properties"]["stream_cursor_field"]["anyOf"] assert declarative_stream["properties"]["transformations"]["type"] == "array" assert {"$ref": "#/definitions/AddFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] assert {"$ref": "#/definitions/RemoveFields"} in declarative_stream["properties"]["transformations"]["items"]["anyOf"] assert declarative_stream["properties"]["checkpoint_interval"]["type"] == "integer" - simple_retriever = schema["definitions"]["SimpleRetriever"] - assert simple_retriever["required"] == ["requester", "record_selector"] + simple_retriever = schema["definitions"]["SimpleRetriever"]["allOf"][1] + assert {"requester", "record_selector"}.issubset(simple_retriever["required"]) assert simple_retriever["properties"]["requester"]["$ref"] == "#/definitions/HttpRequester" assert simple_retriever["properties"]["record_selector"]["$ref"] == "#/definitions/RecordSelector" assert simple_retriever["properties"]["name"]["type"] == "string" - assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["type"] == "array" - assert simple_retriever["properties"]["primary_key"]["anyOf"][0]["items"]["type"] == "string" - assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["type"] == "array" - assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["type"] == "array" - assert simple_retriever["properties"]["primary_key"]["anyOf"][1]["items"]["items"]["type"] == "string" - assert simple_retriever["properties"]["primary_key"]["anyOf"][2]["type"] == "string" + assert {"type": "array", "items": {"type": "string"}} in declarative_stream["properties"]["primary_key"]["anyOf"] + assert {"type": "array", "items": {"type": "array", "items": {"type": "string"}}} in declarative_stream["properties"]["primary_key"][ + "anyOf" + ] + assert {"type": "string"} in declarative_stream["properties"]["primary_key"]["anyOf"] assert {"$ref": "#/definitions/LimitPaginator"} in simple_retriever["properties"]["paginator"]["anyOf"] assert {"$ref": "#/definitions/NoPagination"} in simple_retriever["properties"]["paginator"]["anyOf"] assert {"$ref": "#/definitions/CartesianProductStreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] @@ -296,17 +295,15 @@ def test_generate_schema(): assert {"$ref": "#/definitions/SingleSlice"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] assert {"$ref": "#/definitions/SubstreamSlicer"} in simple_retriever["properties"]["stream_slicer"]["anyOf"] - http_requester = schema["definitions"]["HttpRequester"] - assert http_requester["required"] == ["name", "url_base", "path", "config"] + http_requester = schema["definitions"]["HttpRequester"]["allOf"][1] + assert {"name", "url_base", "path", "config"}.issubset(http_requester["required"]) assert http_requester["properties"]["name"]["type"] == "string" assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] - assert http_requester["properties"]["http_method"]["anyOf"][0]["type"] == "string" - assert http_requester["properties"]["http_method"]["anyOf"][1]["type"] == "string" - assert "GET" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] - assert "POST" in http_requester["properties"]["http_method"]["anyOf"][1]["enum"] + assert {"type": "string"} in http_requester["properties"]["http_method"]["anyOf"] + assert {"type": "string", "enum": ["GET", "POST"]} in http_requester["properties"]["http_method"]["anyOf"] assert http_requester["properties"]["request_options_provider"]["$ref"] == "#/definitions/InterpolatedRequestOptionsProvider" assert {"$ref": "#/definitions/DeclarativeOauth2Authenticator"} in http_requester["properties"]["authenticator"]["anyOf"] assert {"$ref": "#/definitions/ApiKeyAuthenticator"} in http_requester["properties"]["authenticator"]["anyOf"] @@ -315,34 +312,23 @@ def test_generate_schema(): assert {"$ref": "#/definitions/CompositeErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] assert {"$ref": "#/definitions/DefaultErrorHandler"} in http_requester["properties"]["error_handler"]["anyOf"] - api_key_authenticator = schema["definitions"]["ApiKeyAuthenticator"] - assert api_key_authenticator["required"] == ["header", "api_token", "config"] + api_key_authenticator = schema["definitions"]["ApiKeyAuthenticator"]["allOf"][1] + assert {"header", "api_token", "config"}.issubset(api_key_authenticator["required"]) assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["header"]["anyOf"] assert {"type": "string"} in api_key_authenticator["properties"]["header"]["anyOf"] assert {"$ref": "#/definitions/InterpolatedString"} in api_key_authenticator["properties"]["api_token"]["anyOf"] assert {"type": "string"} in api_key_authenticator["properties"]["api_token"]["anyOf"] - default_error_handler = schema["definitions"]["DefaultErrorHandler"] + default_error_handler = schema["definitions"]["DefaultErrorHandler"]["allOf"][1] assert default_error_handler["properties"]["response_filters"]["type"] == "array" assert default_error_handler["properties"]["response_filters"]["items"]["$ref"] == "#/definitions/HttpResponseFilter" assert default_error_handler["properties"]["max_retries"]["type"] == "integer" assert default_error_handler["properties"]["backoff_strategies"]["type"] == "array" - assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] - assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ - "anyOf" - ] - assert {"$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ - "items" - ]["anyOf"] - assert {"$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ - "items" - ]["anyOf"] - - exponential_backoff_strategy = schema["definitions"]["ExponentialBackoffStrategy"] - assert exponential_backoff_strategy["properties"]["factor"]["type"] == "number" - limit_paginator = schema["definitions"]["LimitPaginator"] - assert limit_paginator["required"] == ["page_size", "limit_option", "page_token_option", "pagination_strategy", "config", "url_base"] + limit_paginator = schema["definitions"]["LimitPaginator"]["allOf"][1] + assert {"page_size", "limit_option", "page_token_option", "pagination_strategy", "config", "url_base"}.issubset( + limit_paginator["required"] + ) assert limit_paginator["properties"]["page_size"]["type"] == "integer" assert limit_paginator["properties"]["limit_option"]["$ref"] == "#/definitions/RequestOption" assert limit_paginator["properties"]["page_token_option"]["$ref"] == "#/definitions/RequestOption" @@ -353,17 +339,44 @@ def test_generate_schema(): assert {"$ref": "#/definitions/InterpolatedString"} in http_requester["properties"]["url_base"]["anyOf"] assert {"type": "string"} in http_requester["properties"]["path"]["anyOf"] - cursor_pagination_stategy = schema["definitions"]["CursorPaginationStrategy"] - assert cursor_pagination_stategy["required"] == ["cursor_value", "config"] - assert {"$ref": "#/definitions/InterpolatedString"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] - assert {"type": "string"} in cursor_pagination_stategy["properties"]["cursor_value"]["anyOf"] - assert cursor_pagination_stategy["properties"]["stop_condition"]["$ref"] == "#/definitions/InterpolatedBoolean" - assert cursor_pagination_stategy["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" + cursor_pagination_strategy = schema["definitions"]["CursorPaginationStrategy"]["allOf"][1] + assert {"cursor_value", "config"}.issubset(cursor_pagination_strategy["required"]) + assert {"$ref": "#/definitions/InterpolatedString"} in cursor_pagination_strategy["properties"]["cursor_value"]["anyOf"] + assert {"type": "string"} in cursor_pagination_strategy["properties"]["cursor_value"]["anyOf"] + assert {"$ref": "#/definitions/InterpolatedBoolean"} in cursor_pagination_strategy["properties"]["stop_condition"]["anyOf"] + assert {"type": "string"} in cursor_pagination_strategy["properties"]["stop_condition"]["anyOf"] + assert cursor_pagination_strategy["properties"]["decoder"]["$ref"] == "#/definitions/JsonDecoder" - list_stream_slicer = schema["definitions"]["ListStreamSlicer"] - assert list_stream_slicer["required"] == ["slice_values", "cursor_field", "config"] + list_stream_slicer = schema["definitions"]["ListStreamSlicer"]["allOf"][1] + assert {"slice_values", "cursor_field", "config"}.issubset(list_stream_slicer["required"]) assert {"type": "array", "items": {"type": "string"}} in list_stream_slicer["properties"]["slice_values"]["anyOf"] assert {"type": "string"} in list_stream_slicer["properties"]["slice_values"]["anyOf"] assert {"$ref": "#/definitions/InterpolatedString"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] assert {"type": "string"} in list_stream_slicer["properties"]["cursor_field"]["anyOf"] assert list_stream_slicer["properties"]["request_option"]["$ref"] == "#/definitions/RequestOption" + + added_field_definition = schema["definitions"]["AddedFieldDefinition"] + assert {"path", "value"}.issubset(added_field_definition["required"]) + assert added_field_definition["properties"]["path"]["type"] == "array" + assert added_field_definition["properties"]["path"]["items"]["type"] == "string" + assert {"$ref": "#/definitions/InterpolatedString"} in added_field_definition["properties"]["value"]["anyOf"] + assert {"type": "string"} in added_field_definition["properties"]["value"]["anyOf"] + + # There is something very strange about JsonSchemaMixin.json_schema(). For some reason, when this test is called independently + # it will pass. However, when it is invoked with the entire test, certain components won't get generated in the schema. Since + # the generate_schema() method is invoked by itself, this doesn't happen normally, but only in tests that are all called together + # One way to replicate this is add DefaultErrorHandler.json_schema() to the start of this test and uncomment the assertions below + + # assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] + # assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ + # "anyOf" + # ] + # assert {"$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + # "items" + # ]["anyOf"] + # assert {"$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"][ + # "items" + # ]["anyOf"] + # + # exponential_backoff_strategy = schema["definitions"]["ExponentialBackoffStrategy"]["allOf"][1] + # assert exponential_backoff_strategy["properties"]["factor"]["type"] == "number" From 23d9dd1c6e689daf7e03e976ee9a3b93751055f2 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:40:11 -0700 Subject: [PATCH 23/28] accidentally removed a mixin --- .../requesters/paginators/strategies/pagination_strategy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index 29cacb726483e..a2d9407a833dc 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -7,10 +7,11 @@ from typing import Any, List, Mapping, Optional import requests +from dataclasses_jsonschema import JsonSchemaMixin @dataclass -class PaginationStrategy: +class PaginationStrategy(JsonSchemaMixin): """ Defines how to get the next page token """ From 9476b5c51dceed1fd9f4d17af7bb1f3af8567e6e Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:41:40 -0700 Subject: [PATCH 24/28] remove unneeded test --- .../python/unit_tests/sources/declarative/test_factory.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index 61d4406494803..f6079fdaada48 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -39,7 +39,6 @@ from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition -from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource from jsonschema import ValidationError factory = DeclarativeComponentFactory() @@ -808,8 +807,3 @@ def test_validate_types_nested_in_list(): def test_unpack(test_name, input_type, expected_unpacked_types): actual_unpacked_types = DeclarativeComponentFactory.unpack(input_type) assert actual_unpacked_types == expected_unpacked_types - - -def test_complete_schema(): - schema = YamlDeclarativeSource.generate_schema() - print(schema) From b1da2c2a7c3250622be17b1a04604d152041c6b3 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 00:49:12 -0700 Subject: [PATCH 25/28] make comment a little more clear --- .../sources/declarative/test_yaml_declarative_source.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py index 3740fbac3e060..94a94d888b42d 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_yaml_declarative_source.py @@ -363,9 +363,10 @@ def test_generate_schema(): assert {"type": "string"} in added_field_definition["properties"]["value"]["anyOf"] # There is something very strange about JsonSchemaMixin.json_schema(). For some reason, when this test is called independently - # it will pass. However, when it is invoked with the entire test, certain components won't get generated in the schema. Since - # the generate_schema() method is invoked by itself, this doesn't happen normally, but only in tests that are all called together - # One way to replicate this is add DefaultErrorHandler.json_schema() to the start of this test and uncomment the assertions below + # it will pass. However, when it is invoked with the entire test file, certain components won't get generated in the schema. Since + # the generate_schema() method is invoked by independently so this doesn't happen under normal circumstance when we generate the + # complete schema. It only happens when the tests are all called together. + # One way to replicate this is to add DefaultErrorHandler.json_schema() to the start of this test and uncomment the assertions below # assert {"$ref": "#/definitions/ConstantBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"]["anyOf"] # assert {"$ref": "#/definitions/ExponentialBackoffStrategy"} in default_error_handler["properties"]["backoff_strategies"]["items"][ From 364d2ca1fdccc2628468bcde4d3a3e157807439c Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 12:33:08 -0700 Subject: [PATCH 26/28] generic enum not enum class --- .../sources/declarative/yaml_declarative_source.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 0019ec5975472..10a65f4945e16 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -7,7 +7,7 @@ import logging import typing from dataclasses import dataclass, fields -from enum import EnumMeta +from enum import Enum, EnumMeta from typing import Any, List, Mapping, Union from airbyte_cdk.sources.declarative.checks import CheckStream @@ -17,7 +17,6 @@ from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser -from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.streams.core import Stream from dataclasses_jsonschema import JsonSchemaMixin from jsonschema.validators import validate @@ -151,8 +150,6 @@ def _get_next_expand_classes(field_type) -> list[type]: class SchemaEncoder(json.JSONEncoder): def default(self, obj): - if isinstance(obj, property): - return str(obj) - elif isinstance(obj, HttpMethod): + if isinstance(obj, property) or isinstance(obj, Enum): return str(obj) return json.JSONEncoder.default(self, obj) From 89105fc02b3a8c6e9c1d05eaa4d42f6acb6b9d5b Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 13:47:10 -0700 Subject: [PATCH 27/28] add generated json file and update docs to reference it --- .../declarative/config_component_schema.json | 1297 +++++++++++++++++ .../config-based/yaml-structure.md | 6 +- 2 files changed, 1302 insertions(+), 1 deletion(-) create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json b/airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json new file mode 100644 index 0000000000000..29ba32b36458d --- /dev/null +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json @@ -0,0 +1,1297 @@ +{ + "type": "object", + "required": ["version", "checker", "streams"], + "properties": { + "version": { + "type": "string" + }, + "checker": { + "$ref": "#/definitions/CheckStream" + }, + "streams": { + "type": "array", + "items": { + "$ref": "#/definitions/DeclarativeStream" + } + } + }, + "description": "ConcreteDeclarativeSource(version: str, checker: airbyte_cdk.sources.declarative.checks.check_stream.CheckStream, streams: List[airbyte_cdk.sources.declarative.declarative_stream.DeclarativeStream])", + "$schema": "http://json-schema.org/draft-06/schema#", + "definitions": { + "CheckStream": { + "type": "object", + "required": ["stream_names"], + "properties": { + "stream_names": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "description": "\n Checks the connections by trying to read records from one or many of the streams selected by the developer\n\n Attributes:\n stream_name (List[str]): name of streams to read records from\n " + }, + "DeclarativeStream": { + "type": "object", + "required": ["schema_loader", "retriever", "config"], + "properties": { + "schema_loader": { + "$ref": "#/definitions/JsonSchema" + }, + "retriever": { + "$ref": "#/definitions/SimpleRetriever" + }, + "config": { + "type": "object" + }, + "name": { + "type": "string", + "default": "" + }, + "_name": { + "type": "string", + "default": "" + }, + "primary_key": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + }, + { + "type": "string" + } + ], + "default": "" + }, + "_primary_key": { + "type": "string", + "default": "" + }, + "stream_cursor_field": { + "anyOf": [ + { "type": "array", "items": { "type": "string" } }, + { "type": "string" } + ] + }, + "transformations": { + "type": "array", + "items": { + "anyOf": [ + { "$ref": "#/definitions/AddFields" }, + { "$ref": "#/definitions/RemoveFields" } + ] + } + }, + "checkpoint_interval": { + "type": "integer" + } + }, + "description": "\n DeclarativeStream is a Stream that delegates most of its logic to its schema_load and retriever\n\n Attributes:\n name (str): stream name\n primary_key (Optional[Union[str, List[str], List[List[str]]]]): the primary key of the stream\n schema_loader (SchemaLoader): The schema loader\n retriever (Retriever): The retriever\n config (Config): The user-provided configuration as specified by the source's spec\n stream_cursor_field (Optional[List[str]]): The cursor field\n transformations (List[RecordTransformation]): A list of transformations to be applied to each output record in the\n stream. Transformations are applied in the order in which they are defined.\n checkpoint_interval (Optional[int]): How often the stream will checkpoint state (i.e: emit a STATE message)\n " + }, + "JsonSchema": { + "allOf": [ + { + "$ref": "#/definitions/SchemaLoader" + }, + { + "type": "object", + "required": ["file_path", "config"], + "properties": { + "file_path": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + } + } + } + ], + "description": "\n Loads the schema from a json file\n\n Attributes:\n file_path (Union[InterpolatedString, str]): The path to the json file describing the schema\n name (str): The stream's name\n config (Config): The user-provided configuration as specified by the source's spec\n options (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed\n " + }, + "InterpolatedString": { + "type": "object", + "required": ["string"], + "properties": { + "string": { + "type": "string" + }, + "default": { + "type": "string" + } + }, + "description": "\n Wrapper around a raw string to be interpolated with the Jinja2 templating engine\n\n Attributes:\n string (str): The string to evalute\n default (Optional[str]): The default value to return if the evaluation returns an empty string\n options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation\n " + }, + "SchemaLoader": { + "type": "object", + "properties": {}, + "description": "Describes a stream's schema" + }, + "SimpleRetriever": { + "allOf": [ + { + "$ref": "#/definitions/Retriever" + }, + { + "type": "object", + "required": ["requester", "record_selector"], + "properties": { + "requester": { + "$ref": "#/definitions/HttpRequester" + }, + "record_selector": { + "$ref": "#/definitions/RecordSelector" + }, + "name": { + "type": "string", + "default": "" + }, + "_name": { + "type": "string", + "default": "" + }, + "primary_key": { + "anyOf": [ + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + }, + { + "type": "string" + } + ], + "default": "" + }, + "_primary_key": { + "type": "string", + "default": "" + }, + "paginator": { + "anyOf": [ + { "$ref": "#/definitions/LimitPaginator" }, + { "$ref": "#/definitions/NoPagination" } + ] + }, + "stream_slicer": { + "anyOf": [ + { + "$ref": "#/definitions/CartesianProductStreamSlicer" + }, + { + "$ref": "#/definitions/DatetimeStreamSlicer" + }, + { + "$ref": "#/definitions/ListStreamSlicer" + }, + { + "$ref": "#/definitions/SingleSlice" + }, + { + "$ref": "#/definitions/SubstreamSlicer" + } + ], + "default": {} + } + } + } + ], + "description": "\n Retrieves records by synchronously sending requests to fetch records.\n\n The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the stream slicer.\n\n For each stream slice, submit requests until there are no more pages of records to fetch.\n\n This retriever currently inherits from HttpStream to reuse the request submission and pagination machinery.\n As a result, some of the parameters passed to some methods are unused.\n The two will be decoupled in a future release.\n\n Attributes:\n stream_name (str): The stream's name\n stream_primary_key (Optional[Union[str, List[str], List[List[str]]]]): The stream's primary key\n requester (Requester): The HTTP requester\n record_selector (HttpSelector): The record selector\n paginator (Optional[Paginator]): The paginator\n stream_slicer (Optional[StreamSlicer]): The stream slicer\n options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation\n " + }, + "HttpRequester": { + "allOf": [ + { + "$ref": "#/definitions/Requester" + }, + { + "type": "object", + "required": ["name", "url_base", "path", "config"], + "properties": { + "name": { + "type": "string" + }, + "url_base": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "path": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + }, + "http_method": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "string", + "enum": ["GET", "POST"] + } + ], + "default": "HttpMethod.GET" + }, + "request_options_provider": { + "$ref": "#/definitions/InterpolatedRequestOptionsProvider" + }, + "authenticator": { + "anyOf": [ + { + "$ref": "#/definitions/NoAuth" + }, + { + "$ref": "#/definitions/DeclarativeOauth2Authenticator" + }, + { + "$ref": "#/definitions/ApiKeyAuthenticator" + }, + { + "$ref": "#/definitions/BearerAuthenticator" + }, + { + "$ref": "#/definitions/BasicHttpAuthenticator" + } + ] + }, + "error_handler": { + "anyOf": [ + { "$ref": "#/definitions/CompositeErrorHandler" }, + { "$ref": "#/definitions/DefaultErrorHandler" } + ] + } + } + } + ], + "description": "\n Default implementation of a Requester\n\n Attributes:\n name (str): Name of the stream. Only used for request/response caching\n url_base (Union[InterpolatedString, str]): Base url to send requests to\n path (Union[InterpolatedString, str]): Path to send requests to\n http_method (Union[str, HttpMethod]): HTTP method to use when sending requests\n request_options_provider (Optional[InterpolatedRequestOptionsProvider]): request option provider defining the options to set on outgoing requests\n authenticator (DeclarativeAuthenticator): Authenticator defining how to authenticate to the source\n error_handler (Optional[ErrorHandler]): Error handler defining how to detect and handle errors\n config (Config): The user-provided configuration as specified by the source's spec\n " + }, + "InterpolatedRequestOptionsProvider": { + "allOf": [ + { + "$ref": "#/definitions/RequestOptionsProvider" + }, + { + "type": "object", + "properties": { + "config": { + "type": "object", + "default": {} + }, + "request_parameters": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { "type": "string" } + }, + { "type": "string" } + ] + }, + "request_headers": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { "type": "string" } + }, + { "type": "string" } + ] + }, + "request_body_data": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { "type": "string" } + }, + { "type": "string" } + ] + }, + "request_body_json": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { "type": "string" } + }, + { "type": "string" } + ] + } + } + } + ], + "description": "\n Defines the request options to set on an outgoing HTTP request by evaluating `InterpolatedMapping`s\n\n Attributes:\n config (Config): The user-provided configuration as specified by the source's spec\n request_parameters (Union[str, Mapping[str, str]]): The request parameters to set on an outgoing HTTP request\n request_headers (Union[str, Mapping[str, str]]): The request headers to set on an outgoing HTTP request\n request_body_data (Union[str, Mapping[str, str]]): The body data to set on an outgoing HTTP request\n request_body_json (Union[str, Mapping[str, str]]): The json content to set on an outgoing HTTP request\n " + }, + "RequestOptionsProvider": { + "type": "object", + "properties": {}, + "description": "\n Defines the request options to set on an outgoing HTTP request\n\n Options can be passed by\n - request parameter\n - request headers\n - body data\n - json content\n " + }, + "NoAuth": { + "allOf": [ + { "$ref": "#/definitions/DeclarativeAuthenticator" }, + { "type": "object", "properties": {} } + ], + "description": "NoAuth()" + }, + "DeclarativeAuthenticator": { + "type": "object", + "properties": {}, + "description": "\n Interface used to associate which authenticators can be used as part of the declarative framework\n " + }, + "DeclarativeOauth2Authenticator": { + "allOf": [ + { + "$ref": "#/definitions/DeclarativeAuthenticator" + }, + { + "type": "object", + "required": [ + "token_refresh_endpoint", + "client_id", + "client_secret", + "refresh_token", + "config" + ], + "properties": { + "token_refresh_endpoint": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "client_id": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "client_secret": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "refresh_token": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + }, + "scopes": { + "type": "array", + "items": { + "type": "string" + } + }, + "token_expiry_date": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "_token_expiry_date": {}, + "access_token_name": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ], + "default": "access_token" + }, + "expires_in_name": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ], + "default": "expires_in" + }, + "refresh_request_body": { + "type": "object" + } + } + } + ], + "description": "\n Generates OAuth2.0 access tokens from an OAuth2.0 refresh token and client credentials based on\n a declarative connector configuration file. Credentials can be defined explicitly or via interpolation\n at runtime. The generated access token is attached to each request via the Authorization header.\n\n Attributes:\n token_refresh_endpoint (Union[InterpolatedString, str]): The endpoint to refresh the access token\n client_id (Union[InterpolatedString, str]): The client id\n client_secret (Union[InterpolatedString, str]): Client secret\n refresh_token (Union[InterpolatedString, str]): The token used to refresh the access token\n access_token_name (Union[InterpolatedString, str]): THe field to extract access token from in the response\n expires_in_name (Union[InterpolatedString, str]): The field to extract expires_in from in the response\n config (Mapping[str, Any]): The user-provided configuration as specified by the source's spec\n scopes (Optional[List[str]]): The scopes to request\n token_expiry_date (Optional[Union[InterpolatedString, str]]): The access token expiration date\n refresh_request_body (Optional[Mapping[str, Any]]): The request body to send in the refresh request\n " + }, + "ApiKeyAuthenticator": { + "allOf": [ + { + "$ref": "#/definitions/DeclarativeAuthenticator" + }, + { + "type": "object", + "required": ["header", "api_token", "config"], + "properties": { + "header": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "api_token": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + } + } + } + ], + "description": "\n ApiKeyAuth sets a request header on the HTTP requests sent.\n\n The header is of the form:\n `\"
\": \"\"`\n\n For example,\n `ApiKeyAuthenticator(\"Authorization\", \"Bearer hello\")`\n will result in the following header set on the HTTP request\n `\"Authorization\": \"Bearer hello\"`\n\n Attributes:\n header (Union[InterpolatedString, str]): Header key to set on the HTTP requests\n api_token (Union[InterpolatedString, str]): Header value to set on the HTTP requests\n config (Config): The user-provided configuration as specified by the source's spec\n options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation\n " + }, + "BearerAuthenticator": { + "allOf": [ + { + "$ref": "#/definitions/DeclarativeAuthenticator" + }, + { + "type": "object", + "required": ["api_token", "config"], + "properties": { + "api_token": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + } + } + } + ], + "description": "\n Authenticator that sets the Authorization header on the HTTP requests sent.\n\n The header is of the form:\n `\"Authorization\": \"Bearer \"`\n\n Attributes:\n api_token (Union[InterpolatedString, str]): The bearer token\n config (Config): The user-provided configuration as specified by the source's spec\n options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation\n " + }, + "BasicHttpAuthenticator": { + "allOf": [ + { + "$ref": "#/definitions/DeclarativeAuthenticator" + }, + { + "type": "object", + "required": ["username", "config"], + "properties": { + "username": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + }, + "password": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ], + "default": "" + } + } + } + ], + "description": "\n Builds auth based off the basic authentication scheme as defined by RFC 7617, which transmits credentials as USER ID/password pairs, encoded using base64\n https://developer.mozilla.org/en-US/docs/Web/HTTP/Authentication#basic_authentication_scheme\n\n The header is of the form\n `\"Authorization\": \"Basic \"`\n\n Attributes:\n username (Union[InterpolatedString, str]): The username\n config (Config): The user-provided configuration as specified by the source's spec\n password (Union[InterpolatedString, str]): The password\n options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation\n " + }, + "CompositeErrorHandler": { + "allOf": [ + { + "$ref": "#/definitions/ErrorHandler" + }, + { + "type": "object", + "required": ["error_handlers"], + "properties": { + "error_handlers": { + "type": "array", + "items": { + "anyOf": [ + { "$ref": "#/definitions/CompositeErrorHandler" }, + { "$ref": "#/definitions/DefaultErrorHandler" } + ] + } + } + } + } + ], + "description": "\n Error handler that sequentially iterates over a list of `ErrorHandler`s\n\n Sample config chaining 2 different retriers:\n error_handler:\n type: \"CompositeErrorHandler\"\n error_handlers:\n - response_filters:\n - predicate: \"{{ 'codase' in response }}\"\n action: RETRY\n backoff_strategies:\n - type: \"ConstantBackoffStrategy\"\n backoff_time_in_seconds: 5\n - response_filters:\n - http_codes: [403]\n action: RETRY\n backoff_strategies:\n - type: \"ConstantBackoffStrategy\"\n backoff_time_in_seconds: 10\n Attributes:\n error_handlers (List[ErrorHandler]): list of error handlers\n " + }, + "DefaultErrorHandler": { + "allOf": [ + { + "$ref": "#/definitions/ErrorHandler" + }, + { + "type": "object", + "properties": { + "response_filters": { + "type": "array", + "items": { + "$ref": "#/definitions/HttpResponseFilter" + } + }, + "max_retries": { + "type": "integer", + "default": "" + }, + "_max_retries": { + "type": "integer", + "default": 5 + }, + "backoff_strategies": { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/ConstantBackoffStrategy" + }, + { + "$ref": "#/definitions/ExponentialBackoffStrategy" + }, + { + "$ref": "#/definitions/WaitTimeFromHeaderBackoffStrategy" + }, + { + "$ref": "#/definitions/WaitUntilTimeFromHeaderBackoffStrategy" + } + ] + } + } + } + } + ], + "description": "\n Default error handler.\n\n By default, the handler will only retry server errors (HTTP 5XX) and too many requests (HTTP 429) with exponential backoff.\n\n If the response is successful, then return SUCCESS\n Otherwise, iterate over the response_filters.\n If any of the filter match the response, then return the appropriate status.\n If the match is RETRY, then iterate sequentially over the backoff_strategies and return the first non-None backoff time.\n\n Sample configs:\n\n 1. retry 10 times\n `\n error_handler:\n max_retries: 10\n `\n 2. backoff for 5 seconds\n `\n error_handler:\n backoff_strategies:\n - type: \"ConstantBackoffStrategy\"\n backoff_time_in_seconds: 5\n `\n 3. retry on HTTP 404\n `\n error_handler:\n response_filters:\n - http_codes: [404]\n action: RETRY\n `\n 4. ignore HTTP 404\n `\n error_handler:\n - http_codes: [404]\n action: IGNORE\n `\n 5. retry if error message contains `retrythisrequest!` substring\n `\n error_handler:\n response_filters:\n - error_message_contain: \"retrythisrequest!\"\n action: IGNORE\n `\n 6. retry if 'code' is a field present in the response body\n `\n error_handler:\n response_filters:\n - predicate: \"{{ 'code' in response }}\"\n action: IGNORE\n `\n\n 7. ignore 429 and retry on 404\n `\n error_handler:\n - http_codes: [429]\n action: IGNORE\n - http_codes: [404]\n action: RETRY\n `\n\n Attributes:\n response_filters (Optional[List[HttpResponseFilter]]): response filters to iterate on\n max_retries (Optional[int]): maximum retry attempts\n backoff_strategies (Optional[List[BackoffStrategy]]): list of backoff strategies to use to determine how long\n to wait before retrying\n " + }, + "HttpResponseFilter": { + "type": "object", + "required": ["action"], + "properties": { + "action": { + "anyOf": [ + { + "type": "string", + "enum": ["SUCCESS", "FAIL", "IGNORE", "RETRY"] + }, + { + "type": "string" + } + ] + }, + "http_codes": { + "type": "array", + "items": { + "type": "integer" + }, + "uniqueItems": true + }, + "error_message_contains": { + "type": "string" + }, + "predicate": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedBoolean" }, + { "type": "string" } + ], + "default": "" + } + }, + "description": "\n Filter to select HttpResponses\n\n Attributes:\n action (Union[ResponseAction, str]): action to execute if a request matches\n http_codes (Set[int]): http code of matching requests\n error_message_contains (str): error substring of matching requests\n predicate (str): predicate to apply to determine if a request is matching\n " + }, + "InterpolatedBoolean": { + "type": "object", + "required": ["condition"], + "properties": { + "condition": { + "type": "string" + } + }, + "description": "InterpolatedBoolean(condition: str, options: dataclasses.InitVar[typing.Mapping[str, typing.Any]])" + }, + "ConstantBackoffStrategy": { + "allOf": [ + { + "$ref": "#/definitions/BackoffStrategy" + }, + { + "type": "object", + "required": ["backoff_time_in_seconds"], + "properties": { + "backoff_time_in_seconds": { + "type": "number" + } + } + } + ], + "description": "\n Backoff strategy with a constant backoff interval\n\n Attributes:\n backoff_time_in_seconds (float): time to backoff before retrying a retryable request.\n " + }, + "BackoffStrategy": { + "type": "object", + "properties": {}, + "description": "\n Backoff strategy defining how long to wait before retrying a request that resulted in an error.\n " + }, + "ExponentialBackoffStrategy": { + "allOf": [ + { "$ref": "#/definitions/BackoffStrategy" }, + { + "type": "object", + "properties": { "factor": { "type": "number", "default": 5 } } + } + ], + "description": "\n Backoff strategy with an exponential backoff interval\n\n Attributes:\n factor (float): multiplicative factor\n " + }, + "WaitTimeFromHeaderBackoffStrategy": { + "allOf": [ + { + "$ref": "#/definitions/BackoffStrategy" + }, + { + "type": "object", + "required": ["header"], + "properties": { + "header": { + "type": "string" + }, + "regex": { + "type": "string" + } + } + } + ], + "description": "\n Extract wait time from http header\n\n Attributes:\n header (str): header to read wait time from\n regex (Optional[str]): optional regex to apply on the header to extract its value\n " + }, + "WaitUntilTimeFromHeaderBackoffStrategy": { + "allOf": [ + { + "$ref": "#/definitions/BackoffStrategy" + }, + { + "type": "object", + "required": ["header"], + "properties": { + "header": { + "type": "string" + }, + "min_wait": { + "type": "number" + }, + "regex": { + "type": "string" + } + } + } + ], + "description": "\n Extract time at which we can retry the request from response header\n and wait for the difference between now and that time\n\n Attributes:\n header (str): header to read wait time from\n min_wait (Optional[float]): minimum time to wait for safety\n regex (Optional[str]): optional regex to apply on the header to extract its value\n " + }, + "ErrorHandler": { + "type": "object", + "properties": {}, + "description": "\n Defines whether a request was successful and how to handle a failure.\n " + }, + "Requester": { + "allOf": [ + { "$ref": "#/definitions/RequestOptionsProvider" }, + { "type": "object", "properties": {} } + ] + }, + "RecordSelector": { + "allOf": [ + { + "$ref": "#/definitions/HttpSelector" + }, + { + "type": "object", + "required": ["extractor"], + "properties": { + "extractor": { + "$ref": "#/definitions/DpathExtractor" + }, + "record_filter": { + "$ref": "#/definitions/RecordFilter" + } + } + } + ], + "description": "\n Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering\n records based on a heuristic.\n\n Attributes:\n extractor (RecordExtractor): The record extractor responsible for extracting records from a response\n record_filter (RecordFilter): The record filter responsible for filtering extracted records\n " + }, + "DpathExtractor": { + "allOf": [ + { + "$ref": "#/definitions/RecordExtractor" + }, + { + "type": "object", + "required": ["field_pointer", "config"], + "properties": { + "field_pointer": { + "type": "array", + "items": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + } + }, + "config": { + "type": "object" + }, + "decoder": { + "$ref": "#/definitions/JsonDecoder", + "default": {} + } + } + } + ], + "description": "\n Record extractor that searches a decoded response over a path defined as an array of fields.\n\n If the field pointer points to an array, that array is returned.\n If the field pointer points to an object, that object is returned wrapped as an array.\n If the field pointer points to an empty object, an empty array is returned.\n If the field pointer points to a non-existing path, an empty array is returned.\n\n Examples of instantiating this transform:\n ```\n extractor:\n type: DpathExtractor\n field_pointer:\n - \"root\"\n - \"data\"\n ```\n\n ```\n extractor:\n type: DpathExtractor\n field_pointer:\n - \"root\"\n - \"{{ options['field'] }}\"\n ```\n\n ```\n extractor:\n type: DpathExtractor\n field_pointer: []\n ```\n\n Attributes:\n transform (Union[InterpolatedString, str]): Pointer to the field that should be extracted\n config (Config): The user-provided configuration as specified by the source's spec\n decoder (Decoder): The decoder responsible to transfom the response in a Mapping\n " + }, + "JsonDecoder": { + "allOf": [ + { "$ref": "#/definitions/Decoder" }, + { "type": "object", "properties": {} } + ], + "description": "\n Decoder strategy that returns the json-encoded content of a response, if any.\n " + }, + "Decoder": { + "type": "object", + "properties": {}, + "description": "\n Decoder strategy to transform a requests.Response into a Mapping[str, Any]\n " + }, + "RecordExtractor": { + "type": "object", + "properties": {}, + "description": "\n Responsible for translating an HTTP response into a list of records by extracting records from the response.\n " + }, + "RecordFilter": { + "type": "object", + "required": ["config"], + "properties": { + "config": { + "type": "object" + }, + "condition": { + "type": "string", + "default": "" + } + }, + "description": "\n Filter applied on a list of Records\n\n config (Config): The user-provided configuration as specified by the source's spec\n condition (str): The string representing the predicate to filter a record. Records will be removed if evaluated to False\n " + }, + "HttpSelector": { + "type": "object", + "properties": {}, + "description": "\n Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering\n records based on a heuristic.\n " + }, + "LimitPaginator": { + "allOf": [ + { + "$ref": "#/definitions/Paginator" + }, + { + "type": "object", + "required": [ + "page_size", + "limit_option", + "page_token_option", + "pagination_strategy", + "config", + "url_base" + ], + "properties": { + "page_size": { + "type": "integer" + }, + "limit_option": { + "$ref": "#/definitions/RequestOption" + }, + "page_token_option": { + "$ref": "#/definitions/RequestOption" + }, + "pagination_strategy": { + "anyOf": [ + { + "$ref": "#/definitions/CursorPaginationStrategy" + }, + { + "$ref": "#/definitions/OffsetIncrement" + }, + { + "$ref": "#/definitions/PageIncrement" + } + ] + }, + "config": { + "type": "object" + }, + "url_base": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "decoder": { + "$ref": "#/definitions/JsonDecoder", + "default": {} + }, + "_token": {} + } + } + ], + "description": "\n Limit paginator to request pages of results with a fixed size until the pagination strategy no longer returns a next_page_token\n\n Examples:\n 1.\n * fetches up to 10 records at a time by setting the \"limit\" request param to 10\n * updates the request path with \"{{ response._metadata.next }}\"\n paginator:\n type: \"LimitPaginator\"\n page_size: 10\n limit_option:\n inject_into: request_parameter\n field_name: page_size\n page_token_option:\n option_type: path\n pagination_strategy:\n type: \"CursorPagination\"\n cursor_value: \"{{ response._metadata.next }}\"\n `\n\n 2.\n * fetches up to 5 records at a time by setting the \"page_size\" header to 5\n * increments a record counter and set the request parameter \"offset\" to the value of the counter\n `\n paginator:\n type: \"LimitPaginator\"\n page_size: 5\n limit_option:\n inject_into: header\n field_name: page_size\n pagination_strategy:\n type: \"OffsetIncrement\"\n page_token:\n option_type: \"request_parameter\"\n field_name: \"offset\"\n `\n\n 3.\n * fetches up to 5 records at a time by setting the \"page_size\" request param to 5\n * increments a page counter and set the request parameter \"page\" to the value of the counter\n `\n paginator:\n type: \"LimitPaginator\"\n page_size: 5\n limit_option:\n inject_into: request_parameter\n field_name: page_size\n pagination_strategy:\n type: \"PageIncrement\"\n page_token:\n option_type: \"request_parameter\"\n field_name: \"page\"\n\n Attributes:\n page_size (int): the number of records to request\n limit_option (RequestOption): the request option to set the limit. Cannot be injected in the path.\n page_token_option (RequestOption): the request option to set the page token\n pagination_strategy (PaginationStrategy): Strategy defining how to get the next page token\n config (Config): connection config\n url_base (Union[InterpolatedString, str]): endpoint's base url\n decoder (Decoder): decoder to decode the response\n " + }, + "RequestOption": { + "type": "object", + "required": ["inject_into"], + "properties": { + "inject_into": { + "type": "string", + "enum": [ + "request_parameter", + "header", + "path", + "body_data", + "body_json" + ] + }, + "field_name": { + "type": "string" + } + }, + "description": "\n Describes an option to set on a request\n\n Attributes:\n inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter\n field_name (Optional[str]): Describes the name of the parameter to inject. None if option_type == path. Required otherwise.\n " + }, + "CursorPaginationStrategy": { + "allOf": [ + { + "$ref": "#/definitions/PaginationStrategy" + }, + { + "type": "object", + "required": ["cursor_value", "config"], + "properties": { + "cursor_value": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + }, + "stop_condition": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedBoolean" }, + { "type": "string" } + ] + }, + "decoder": { + "$ref": "#/definitions/JsonDecoder", + "default": {} + } + } + } + ], + "description": "\n Pagination strategy that evaluates an interpolated string to define the next page token\n\n Attributes:\n cursor_value (Union[InterpolatedString, str]): template string evaluating to the cursor value\n config (Config): connection config\n stop_condition (Optional[InterpolatedBoolean]): template string evaluating when to stop paginating\n decoder (Decoder): decoder to decode the response\n " + }, + "PaginationStrategy": { + "type": "object", + "properties": {}, + "description": "\n Defines how to get the next page token\n " + }, + "OffsetIncrement": { + "allOf": [ + { + "$ref": "#/definitions/PaginationStrategy" + }, + { + "type": "object", + "required": ["page_size"], + "properties": { + "page_size": { + "type": "integer" + } + } + } + ], + "description": "\n Pagination strategy that returns the number of records reads so far and returns it as the next page token\n\n Attributes:\n page_size (int): the number of records to request\n " + }, + "PageIncrement": { + "allOf": [ + { + "$ref": "#/definitions/PaginationStrategy" + }, + { + "type": "object", + "required": ["page_size"], + "properties": { + "page_size": { + "type": "integer" + } + } + } + ], + "description": "\n Pagination strategy that returns the number of pages reads so far and returns it as the next page token\n\n Attributes:\n page_size (int): the number of records to request\n " + }, + "Paginator": { + "allOf": [ + { "$ref": "#/definitions/RequestOptionsProvider" }, + { "type": "object", "properties": {} } + ], + "description": "\n Defines the token to use to fetch the next page of records from the API.\n\n If needed, the Paginator will set request options to be set on the HTTP request to fetch the next page of records.\n If the next_page_token is the path to the next page of records, then it should be accessed through the `path` method\n " + }, + "NoPagination": { + "allOf": [ + { "$ref": "#/definitions/Paginator" }, + { "type": "object", "properties": {} } + ], + "description": "\n Pagination implementation that never returns a next page.\n " + }, + "CartesianProductStreamSlicer": { + "allOf": [ + { + "$ref": "#/definitions/StreamSlicer" + }, + { + "type": "object", + "required": ["stream_slicers"], + "properties": { + "stream_slicers": { + "type": "array", + "items": { + "anyOf": [ + { + "$ref": "#/definitions/CartesianProductStreamSlicer" + }, + { + "$ref": "#/definitions/DatetimeStreamSlicer" + }, + { + "$ref": "#/definitions/ListStreamSlicer" + }, + { + "$ref": "#/definitions/SingleSlice" + }, + { + "$ref": "#/definitions/SubstreamSlicer" + } + ] + } + } + } + } + ], + "description": "\n Stream slicers that iterates over the cartesian product of input stream slicers\n Given 2 stream slicers with the following slices:\n A: [{\"i\": 0}, {\"i\": 1}, {\"i\": 2}]\n B: [{\"s\": \"hello\"}, {\"s\": \"world\"}]\n the resulting stream slices are\n [\n {\"i\": 0, \"s\": \"hello\"},\n {\"i\": 0, \"s\": \"world\"},\n {\"i\": 1, \"s\": \"hello\"},\n {\"i\": 1, \"s\": \"world\"},\n {\"i\": 2, \"s\": \"hello\"},\n {\"i\": 2, \"s\": \"world\"},\n ]\n\n Attributes:\n stream_slicers (List[StreamSlicer]): Underlying stream slicers. The RequestOptions (e.g: Request headers, parameters, etc..) returned by this slicer are the combination of the RequestOptions of its input slicers. If there are conflicts e.g: two slicers define the same header or request param, the conflict is resolved by taking the value from the first slicer, where ordering is determined by the order in which slicers were input to this composite slicer.\n " + }, + "DatetimeStreamSlicer": { + "allOf": [ + { + "$ref": "#/definitions/StreamSlicer" + }, + { + "type": "object", + "required": [ + "start_datetime", + "end_datetime", + "step", + "cursor_field", + "datetime_format", + "config" + ], + "properties": { + "start_datetime": { + "anyOf": [ + { "$ref": "#/definitions/MinMaxDatetime" }, + { "type": "string" } + ] + }, + "end_datetime": { + "anyOf": [ + { "$ref": "#/definitions/MinMaxDatetime" }, + { "type": "string" } + ] + }, + "step": { + "type": "string" + }, + "cursor_field": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "datetime_format": { + "type": "string" + }, + "config": { + "type": "object" + }, + "_cursor": { + "type": "object" + }, + "_cursor_end": { + "type": "object" + }, + "start_time_option": { + "$ref": "#/definitions/RequestOption" + }, + "end_time_option": { + "$ref": "#/definitions/RequestOption" + }, + "stream_state_field_start": { + "type": "string" + }, + "stream_state_field_end": { + "type": "string" + }, + "lookback_window": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + } + } + } + ], + "description": "\n Slices the stream over a datetime range.\n\n Given a start time, end time, a step function, and an optional lookback window,\n the stream slicer will partition the date range from start time - lookback window to end time.\n\n The step function is defined as a string of the form:\n `\"\"`\n\n where unit can be one of\n - weeks, w\n - days, d\n\n For example, \"1d\" will produce windows of 1 day, and 2weeks windows of 2 weeks.\n\n The timestamp format accepts the same format codes as datetime.strfptime, which are\n all the format codes required by the 1989 C standard.\n Full list of accepted format codes: https://man7.org/linux/man-pages/man3/strftime.3.html\n\n Attributes:\n start_datetime (Union[MinMaxDatetime, str]): the datetime that determines the earliest record that should be synced\n end_datetime (Union[MinMaxDatetime, str]): the datetime that determines the last record that should be synced\n step (str): size of the timewindow\n cursor_field (Union[InterpolatedString, str]): record's cursor field\n datetime_format (str): format of the datetime\n config (Config): connection config\n start_time_option (Optional[RequestOption]): request option for start time\n end_time_option (Optional[RequestOption]): request option for end time\n stream_state_field_start (Optional[str]): stream slice start time field\n stream_state_field_end (Optional[str]): stream slice end time field\n lookback_window (Optional[InterpolatedString]): how many days before start_datetime to read data for\n " + }, + "MinMaxDatetime": { + "type": "object", + "required": ["datetime"], + "properties": { + "datetime": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "datetime_format": { + "type": "string", + "default": "" + }, + "_datetime_format": { + "type": "string", + "default": "" + }, + "min_datetime": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ], + "default": "" + }, + "max_datetime": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ], + "default": "" + } + }, + "description": "\n Compares the provided date against optional minimum or maximum times. If date is earlier than\n min_date, then min_date is returned. If date is greater than max_date, then max_date is returned.\n If neither, the input date is returned.\n\n The timestamp format accepts the same format codes as datetime.strfptime, which are\n all the format codes required by the 1989 C standard.\n Full list of accepted format codes: https://man7.org/linux/man-pages/man3/strftime.3.html\n\n Attributes:\n datetime (Union[InterpolatedString, str]): InterpolatedString or string representing the datetime in the format specified by `datetime_format`\n datetime_format (str): Format of the datetime passed as argument\n min_datetime (Union[InterpolatedString, str]): Represents the minimum allowed datetime value.\n max_datetime (Union[InterpolatedString, str]): Represents the maximum allowed datetime value.\n " + }, + "StreamSlicer": { + "allOf": [ + { "$ref": "#/definitions/RequestOptionsProvider" }, + { "type": "object", "properties": {} } + ], + "description": "\n Slices the stream into a subset of records.\n Slices enable state checkpointing and data retrieval parallelization.\n\n The stream slicer keeps track of the cursor state as a dict of cursor_field -> cursor_value\n\n See the stream slicing section of the docs for more information.\n " + }, + "ListStreamSlicer": { + "allOf": [ + { + "$ref": "#/definitions/StreamSlicer" + }, + { + "type": "object", + "required": ["slice_values", "cursor_field", "config"], + "properties": { + "slice_values": { + "anyOf": [ + { "type": "array", "items": { "type": "string" } }, + { "type": "string" } + ] + }, + "cursor_field": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + }, + "config": { + "type": "object" + }, + "request_option": { + "$ref": "#/definitions/RequestOption" + } + } + } + ], + "description": "\n Stream slicer that iterates over the values of a list\n If slice_values is a string, then evaluate it as literal and assert the resulting literal is a list\n\n Attributes:\n slice_values (Union[str, List[str]]): The values to iterate over\n cursor_field (Union[InterpolatedString, str]): The name of the cursor field\n config (Config): The user-provided configuration as specified by the source's spec\n request_option (Optional[RequestOption]): The request option to configure the HTTP request\n " + }, + "SingleSlice": { + "allOf": [ + { "$ref": "#/definitions/StreamSlicer" }, + { "type": "object", "properties": {} } + ], + "description": "Stream slicer returning only a single stream slice" + }, + "SubstreamSlicer": { + "allOf": [ + { + "$ref": "#/definitions/StreamSlicer" + }, + { + "type": "object", + "required": ["parent_stream_configs"], + "properties": { + "parent_stream_configs": { + "type": "array", + "items": { + "$ref": "#/definitions/ParentStreamConfig" + } + } + } + } + ], + "description": "\n Stream slicer that iterates over the parent's stream slices and records and emits slices by interpolating the slice_definition mapping\n Will populate the state with `parent_stream_slice` and `parent_record` so they can be accessed by other components\n\n Attributes:\n parent_stream_configs (List[ParentStreamConfig]): parent streams to iterate over and their config\n " + }, + "ParentStreamConfig": { + "type": "object", + "required": ["stream", "parent_key", "stream_slice_field"], + "properties": { + "stream": {}, + "parent_key": { + "type": "string" + }, + "stream_slice_field": { + "type": "string" + }, + "request_option": { + "$ref": "#/definitions/RequestOption" + } + }, + "description": "\n Describes how to create a stream slice from a parent stream\n\n stream: The stream to read records from\n parent_key: The key of the parent stream's records that will be the stream slice key\n stream_slice_field: The stream slice key\n request_option: How to inject the slice value on an outgoing HTTP request\n " + }, + "Retriever": { + "type": "object", + "properties": {}, + "description": "\n Responsible for fetching a stream's records from an HTTP API source.\n " + }, + "AddFields": { + "allOf": [ + { + "$ref": "#/definitions/RecordTransformation" + }, + { + "type": "object", + "required": ["fields"], + "properties": { + "fields": { + "type": "array", + "items": { + "$ref": "#/definitions/AddedFieldDefinition" + } + }, + "_parsed_fields": { + "type": "array", + "items": { + "$ref": "#/definitions/ParsedAddFieldDefinition" + }, + "default": [] + } + } + } + ], + "description": "\n Transformation which adds field to an output record. The path of the added field can be nested. Adding nested fields will create all\n necessary parent objects (like mkdir -p). Adding fields to an array will extend the array to that index (filling intermediate\n indices with null values). So if you add a field at index 5 to the array [\"value\"], it will become [\"value\", null, null, null, null,\n \"new_value\"].\n\n\n This transformation has access to the following contextual values:\n record: the record about to be output by the connector\n config: the input configuration provided to a connector\n stream_state: the current state of the stream\n stream_slice: the current stream slice being read\n\n\n\n Examples of instantiating this transformation via YAML:\n - type: AddFields\n fields:\n # hardcoded constant\n - path: [\"path\"]\n value: \"static_value\"\n\n # nested path\n - path: [\"path\", \"to\", \"field\"]\n value: \"static\"\n\n # from config\n - path: [\"shop_id\"]\n value: \"{{ config.shop_id }}\"\n\n # from state\n - path: [\"current_state\"]\n value: \"{{ stream_state.cursor_field }}\" # Or {{ stream_state['cursor_field'] }}\n\n # from record\n - path: [\"unnested_value\"]\n value: {{ record.nested.field }}\n\n # from stream_slice\n - path: [\"start_date\"]\n value: {{ stream_slice.start_date }}\n\n # by supplying any valid Jinja template directive or expression https://jinja.palletsprojects.com/en/3.1.x/templates/#\n - path: [\"two_times_two\"]\n value: {{ 2 * 2 }}\n\n Attributes:\n fields (List[AddedFieldDefinition]): A list of transformations (path and corresponding value) that will be added to the record\n " + }, + "AddedFieldDefinition": { + "type": "object", + "required": ["path", "value"], + "properties": { + "path": { + "type": "array", + "items": { + "type": "string" + } + }, + "value": { + "anyOf": [ + { "$ref": "#/definitions/InterpolatedString" }, + { "type": "string" } + ] + } + }, + "description": "Defines the field to add on a record" + }, + "ParsedAddFieldDefinition": { + "type": "object", + "required": ["path", "value"], + "properties": { + "path": { + "type": "array", + "items": { + "type": "string" + } + }, + "value": { + "$ref": "#/definitions/InterpolatedString" + } + }, + "description": "Defines the field to add on a record" + }, + "RecordTransformation": { + "type": "object", + "properties": {}, + "description": "\n Implementations of this class define transformations that can be applied to records of a stream.\n " + }, + "RemoveFields": { + "allOf": [ + { + "$ref": "#/definitions/RecordTransformation" + }, + { + "type": "object", + "required": ["field_pointers"], + "properties": { + "field_pointers": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + ], + "description": "\n A transformation which removes fields from a record. The fields removed are designated using FieldPointers.\n During transformation, if a field or any of its parents does not exist in the record, no error is thrown.\n\n If an input field pointer references an item in a list (e.g: [\"k\", 0] in the object {\"k\": [\"a\", \"b\", \"c\"]}) then\n the object at that index is set to None rather than being not entirely removed from the list. TODO change this behavior.\n\n It's possible to remove objects nested in lists e.g: removing [\".\", 0, \"k\"] from {\".\": [{\"k\": \"V\"}]} results in {\".\": [{}]}\n\n Usage syntax:\n\n ```yaml\n my_stream:\n \n transformations:\n - type: RemoveFields\n field_pointers:\n - [\"path\", \"to\", \"field1\"]\n - [\"path2\"]\n ```\n\n Attributes:\n field_pointers (List[FieldPointer]): pointers to the fields that should be removed\n " + } + } +} diff --git a/docs/connector-development/config-based/yaml-structure.md b/docs/connector-development/config-based/yaml-structure.md index 32c9843ca4d2f..06be2a0a1917b 100644 --- a/docs/connector-development/config-based/yaml-structure.md +++ b/docs/connector-development/config-based/yaml-structure.md @@ -270,4 +270,8 @@ For example, The macros available can be found [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/macros.py). -Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/# \ No newline at end of file +Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/# + +## Configuration Component Schema Reference + +A JSON schema representation of the relationships between the components that can be used in your YAML configuration can be found [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json) \ No newline at end of file From 63d395da839f1705567e2775f14c31ea7597bea0 Mon Sep 17 00:00:00 2001 From: brianjlai Date: Thu, 18 Aug 2022 13:50:23 -0700 Subject: [PATCH 28/28] verbage --- docs/connector-development/config-based/yaml-structure.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/connector-development/config-based/yaml-structure.md b/docs/connector-development/config-based/yaml-structure.md index 06be2a0a1917b..c51df7167c369 100644 --- a/docs/connector-development/config-based/yaml-structure.md +++ b/docs/connector-development/config-based/yaml-structure.md @@ -272,6 +272,6 @@ The macros available can be found [here](https://github.com/airbytehq/airbyte/bl Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/# -## Configuration Component Schema Reference +## Component schema reference -A JSON schema representation of the relationships between the components that can be used in your YAML configuration can be found [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json) \ No newline at end of file +A JSON schema representation of the relationships between the components that can be used in the YAML configuration can be found [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-cdk/python/airbyte_cdk/sources/declarative/config_component_schema.json). \ No newline at end of file