diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py index d20864f47eb18..ff9d5ef8b104a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/oauth.py @@ -2,146 +2,94 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass, field from typing import Any, List, Mapping, Optional, Union import pendulum from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import AbstractOauth2Authenticator +from dataclasses_jsonschema import JsonSchemaMixin -class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator): +@dataclass +class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, JsonSchemaMixin): """ Generates OAuth2.0 access tokens from an OAuth2.0 refresh token and client credentials based on a declarative connector configuration file. Credentials can be defined explicitly or via interpolation at runtime. The generated access token is attached to each request via the Authorization header. + + Attributes: + token_refresh_endpoint (Union[InterpolatedString, str]): The endpoint to refresh the access token + client_id (Union[InterpolatedString, str]): The client id + client_secret (Union[InterpolatedString, str]): Client secret + refresh_token (Union[InterpolatedString, str]): The token used to refresh the access token + access_token_name (Union[InterpolatedString, str]): THe field to extract access token from in the response + expires_in_name (Union[InterpolatedString, str]): The field to extract expires_in from in the response + config (Mapping[str, Any]): The user-provided configuration as specified by the source's spec + scopes (Optional[List[str]]): The scopes to request + token_expiry_date (Optional[Union[InterpolatedString, str]]): The access token expiration date + refresh_request_body (Optional[Mapping[str, Any]]): The request body to send in the refresh request """ - def __init__( - self, - token_refresh_endpoint: Union[InterpolatedString, str], - client_id: Union[InterpolatedString, str], - client_secret: Union[InterpolatedString, str], - refresh_token: Union[InterpolatedString, str], - config: Mapping[str, Any], - scopes: Optional[List[str]] = None, - token_expiry_date: Optional[Union[InterpolatedString, str]] = None, - access_token_name: Union[InterpolatedString, str] = "access_token", - expires_in_name: Union[InterpolatedString, str] = "expires_in", - refresh_request_body: Optional[Mapping[str, Any]] = None, - **options: Optional[Mapping[str, Any]], - ): - """ - :param token_refresh_endpoint: The endpoint to refresh the access token - :param client_id: The client id - :param client_secret: Client secret - :param refresh_token: The token used to refresh the access token - :param config: The user-provided configuration as specified by the source's spec - :param scopes: The scopes to request - :param token_expiry_date: The access token expiration date - :param access_token_name: THe field to extract access token from in the response - :param expires_in_name:The field to extract expires_in from in the response - :param refresh_request_body: The request body to send in the refresh request - :param options: Additional runtime parameters to be used for string interpolation - """ - self.config = config - self.token_refresh_endpoint = InterpolatedString.create(token_refresh_endpoint, options=options) - self.client_secret = InterpolatedString.create(client_secret, options=options) - self.client_id = InterpolatedString.create(client_id, options=options) - self.refresh_token = InterpolatedString.create(refresh_token, options=options) - self.scopes = scopes - self.access_token_name = InterpolatedString.create(access_token_name, options=options) - self.expires_in_name = InterpolatedString.create(expires_in_name, options=options) - self.refresh_request_body = InterpolatedMapping(refresh_request_body or {}, options=options) - - self.token_expiry_date = ( - pendulum.parse(InterpolatedString.create(token_expiry_date, options=options).eval(self.config)) - if token_expiry_date + token_refresh_endpoint: Union[InterpolatedString, str] + client_id: Union[InterpolatedString, str] + client_secret: Union[InterpolatedString, str] + refresh_token: Union[InterpolatedString, str] + config: Mapping[str, Any] + options: InitVar[Mapping[str, Any]] + scopes: Optional[List[str]] = None + token_expiry_date: Optional[Union[InterpolatedString, str]] = None + _token_expiry_date: pendulum.DateTime = field(init=False, repr=False) + access_token_name: Union[InterpolatedString, str] = "access_token" + expires_in_name: Union[InterpolatedString, str] = "expires_in" + refresh_request_body: Optional[Mapping[str, Any]] = None + + def __post_init__(self, options: Mapping[str, Any]): + self.token_refresh_endpoint = InterpolatedString.create(self.token_refresh_endpoint, options=options) + self.client_id = InterpolatedString.create(self.client_id, options=options) + self.client_secret = InterpolatedString.create(self.client_secret, options=options) + self.refresh_token = InterpolatedString.create(self.refresh_token, options=options) + self.access_token_name = InterpolatedString.create(self.access_token_name, options=options) + self.expires_in_name = InterpolatedString.create(self.expires_in_name, options=options) + self._refresh_request_body = InterpolatedMapping(self.refresh_request_body or {}, options=options) + self._token_expiry_date = ( + pendulum.parse(InterpolatedString.create(self.token_expiry_date, options=options).eval(self.config)) + if self.token_expiry_date else pendulum.now().subtract(days=1) ) - self.access_token = None - - @property - def config(self) -> Mapping[str, Any]: - return self._config - - @config.setter - def config(self, value: Mapping[str, Any]): - self._config = value - - @property - def token_refresh_endpoint(self) -> InterpolatedString: - get_some = self._token_refresh_endpoint.eval(self.config) - return get_some + self._access_token = None - @token_refresh_endpoint.setter - def token_refresh_endpoint(self, value: InterpolatedString): - self._token_refresh_endpoint = value + def get_token_refresh_endpoint(self) -> str: + return self.token_refresh_endpoint.eval(self.config) - @property - def client_id(self) -> InterpolatedString: - return self._client_id.eval(self.config) - - @client_id.setter - def client_id(self, value: InterpolatedString): - self._client_id = value + def get_client_id(self) -> str: + return self.client_id.eval(self.config) - @property - def client_secret(self) -> InterpolatedString: - return self._client_secret.eval(self.config) + def get_client_secret(self) -> str: + return self.client_secret.eval(self.config) - @client_secret.setter - def client_secret(self, value: InterpolatedString): - self._client_secret = value + def get_refresh_token(self) -> str: + return self.refresh_token.eval(self.config) - @property - def refresh_token(self) -> InterpolatedString: - return self._refresh_token.eval(self.config) + def get_scopes(self) -> [str]: + return self.scopes - @refresh_token.setter - def refresh_token(self, value: InterpolatedString): - self._refresh_token = value + def get_access_token_name(self) -> InterpolatedString: + return self.access_token_name.eval(self.config) - @property - def scopes(self) -> [str]: - return self._scopes + def get_expires_in_name(self) -> InterpolatedString: + return self.expires_in_name.eval(self.config) - @scopes.setter - def scopes(self, value: [str]): - self._scopes = value + def get_refresh_request_body(self) -> Mapping[str, Any]: + return self._refresh_request_body.eval(self.config) - @property - def token_expiry_date(self) -> pendulum.DateTime: + def get_token_expiry_date(self) -> pendulum.DateTime: return self._token_expiry_date - @token_expiry_date.setter - def token_expiry_date(self, value: pendulum.DateTime): + def set_token_expiry_date(self, value: pendulum.DateTime): self._token_expiry_date = value - @property - def access_token_name(self) -> InterpolatedString: - return self._access_token_name.eval(self.config) - - @access_token_name.setter - def access_token_name(self, value: InterpolatedString): - self._access_token_name = value - - @property - def expires_in_name(self) -> InterpolatedString: - return self._expires_in_name.eval(self.config) - - @expires_in_name.setter - def expires_in_name(self, value: InterpolatedString): - self._expires_in_name = value - - @property - def refresh_request_body(self) -> InterpolatedMapping: - return self._refresh_request_body.eval(self.config) - - @refresh_request_body.setter - def refresh_request_body(self, value: InterpolatedMapping): - self._refresh_request_body = value - @property def access_token(self) -> str: return self._access_token diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py index 30520b03d9713..04790ae9e3036 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/auth/token.py @@ -3,14 +3,17 @@ # import base64 -from typing import Any, Mapping, Optional, Union +from dataclasses import InitVar, dataclass +from typing import Any, Mapping, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.types import Config from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator +from dataclasses_jsonschema import JsonSchemaMixin -class ApiKeyAuthenticator(AbstractHeaderAuthenticator): +@dataclass +class ApiKeyAuthenticator(AbstractHeaderAuthenticator, JsonSchemaMixin): """ ApiKeyAuth sets a request header on the HTTP requests sent. @@ -22,50 +25,51 @@ class ApiKeyAuthenticator(AbstractHeaderAuthenticator): will result in the following header set on the HTTP request `"Authorization": "Bearer hello"` + Attributes: + header (Union[InterpolatedString, str]): Header key to set on the HTTP requests + api_token (Union[InterpolatedString, str]): Header value to set on the HTTP requests + config (Config): The user-provided configuration as specified by the source's spec + options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - def __init__( - self, - header: Union[InterpolatedString, str], - token: Union[InterpolatedString, str], - config: Config, - **options: Optional[Mapping[str, Any]], - ): - """ - :param header: Header key to set on the HTTP requests - :param token: Header value to set on the HTTP requests - :param config: The user-provided configuration as specified by the source's spec - :param options: Additional runtime parameters to be used for string interpolation - """ - self._header = InterpolatedString.create(header, options=options) - self._token = InterpolatedString.create(token, options=options) - self._config = config + header: Union[InterpolatedString, str] + api_token: Union[InterpolatedString, str] + config: Config + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): + self._header = InterpolatedString.create(self.header, options=options) + self._token = InterpolatedString.create(self.api_token, options=options) @property def auth_header(self) -> str: - return self._header.eval(self._config) + return self._header.eval(self.config) @property def token(self) -> str: - return self._token.eval(self._config) + return self._token.eval(self.config) -class BearerAuthenticator(AbstractHeaderAuthenticator): +@dataclass +class BearerAuthenticator(AbstractHeaderAuthenticator, JsonSchemaMixin): """ Authenticator that sets the Authorization header on the HTTP requests sent. The header is of the form: `"Authorization": "Bearer "` + + Attributes: + api_token (Union[InterpolatedString, str]): The bearer token + config (Config): The user-provided configuration as specified by the source's spec + options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - def __init__(self, token: Union[InterpolatedString, str], config: Config, **options: Optional[Mapping[str, Any]]): - """ - :param token: The bearer token - :param config: The user-provided configuration as specified by the source's spec - :param options: Additional runtime parameters to be used for string interpolation - """ - self._token = InterpolatedString.create(token, options=options) - self._config = config + api_token: Union[InterpolatedString, str] + config: Config + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): + self._token = InterpolatedString.create(self.api_token, options=options) @property def auth_header(self) -> str: @@ -73,9 +77,10 @@ def auth_header(self) -> str: @property def token(self) -> str: - return f"Bearer {self._token.eval(self._config)}" + return f"Bearer {self._token.eval(self.config)}" +@dataclass class BasicHttpAuthenticator(AbstractHeaderAuthenticator): """ Builds auth based off the basic authentication scheme as defined by RFC 7617, which transmits credentials as USER ID/password pairs, encoded using bas64 @@ -83,24 +88,22 @@ class BasicHttpAuthenticator(AbstractHeaderAuthenticator): The header is of the form `"Authorization": "Basic "` + + Attributes: + username (Union[InterpolatedString, str]): The username + config (Config): The user-provided configuration as specified by the source's spec + password (Union[InterpolatedString, str]): The password + options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - def __init__( - self, - username: Union[InterpolatedString, str], - config: Config, - password: Union[InterpolatedString, str] = "", - **options: Optional[Mapping[str, Any]], - ): - """ - :param username: The username - :param config: The user-provided configuration as specified by the source's spec - :param password: The password - :param options: Additional runtime parameters to be used for string interpolation - """ - self._username = InterpolatedString.create(username, options=options) - self._password = InterpolatedString.create(password, options=options) - self._config = config + username: Union[InterpolatedString, str] + config: Config + options: InitVar[Mapping[str, Any]] + password: Union[InterpolatedString, str] = "" + + def __post_init__(self, options): + self._username = InterpolatedString.create(self.username, options=options) + self._password = InterpolatedString.create(self.password, options=options) @property def auth_header(self) -> str: @@ -108,6 +111,6 @@ def auth_header(self) -> str: @property def token(self) -> str: - auth_string = f"{self._username.eval(self._config)}:{self._password.eval(self._config)}".encode("utf8") + auth_string = f"{self._username.eval(self.config)}:{self._password.eval(self.config)}".encode("utf8") b64_encoded = base64.b64encode(auth_string).decode("utf8") return f"Basic {b64_encoded}" diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/__init__.py index 1100c1c58cf51..fb6665d946251 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/__init__.py @@ -1,3 +1,8 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.checks.check_stream import CheckStream +from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker + +__all__ = ["CheckStream", "ConnectionChecker"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py index 47db5130ad96b..decf9fefc862a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py @@ -3,24 +3,28 @@ # import logging -from typing import Any, List, Mapping, Optional, Tuple +from dataclasses import InitVar, dataclass +from typing import Any, List, Mapping, Tuple from airbyte_cdk.models.airbyte_protocol import SyncMode from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker from airbyte_cdk.sources.source import Source +from dataclasses_jsonschema import JsonSchemaMixin -class CheckStream(ConnectionChecker): +@dataclass +class CheckStream(ConnectionChecker, JsonSchemaMixin): """ Checks the connections by trying to read records from one or many of the streams selected by the developer + + Attributes: + stream_name (List[str]): name of streams to read records from """ - def __init__(self, stream_names: List[str], **options: Optional[Mapping[str, Any]]): - """ - :param stream_names: name of streams to read records from - :param options: Additional runtime parameters to be used for string interpolation - """ - self._stream_names = set(stream_names) + stream_names: List[str] + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): self._options = options def check_connection(self, source: Source, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, any]: @@ -28,7 +32,7 @@ def check_connection(self, source: Source, logger: logging.Logger, config: Mappi stream_name_to_stream = {s.name: s for s in streams} if len(streams) == 0: return False, f"No streams to connect to from source {source}" - for stream_name in self._stream_names: + for stream_name in self.stream_names: if stream_name in stream_name_to_stream.keys(): stream = stream_name_to_stream[stream_name] try: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/create_partial.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/create_partial.py index c4b9f4ac56191..c941153f3f84d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/create_partial.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/create_partial.py @@ -41,8 +41,17 @@ def newfunc(*fargs, **fkeywords): kwargs_to_pass_down = _get_kwargs_to_pass_to_func(func, options) all_keywords_to_pass_down = _get_kwargs_to_pass_to_func(func, all_keywords) + + # options is required as part of creation of all declarative components + dynamic_args = {**all_keywords_to_pass_down, **kwargs_to_pass_down} + if "options" not in dynamic_args: + dynamic_args["options"] = {} + else: + # Handles the case where kwarg options and keyword $options both exist. We should merge both sets of options + # before creating the component + dynamic_args["options"] = {**all_keywords_to_pass_down["options"], **kwargs_to_pass_down["options"]} try: - ret = func(*args, *fargs, **{**all_keywords_to_pass_down, **kwargs_to_pass_down}) + ret = func(*args, *fargs, **dynamic_args) except TypeError as e: raise Exception(f"failed to create object of type {func} because {e}") return ret @@ -54,12 +63,14 @@ def newfunc(*fargs, **fkeywords): return newfunc -def _get_kwargs_to_pass_to_func(func, kwargs): +def _get_kwargs_to_pass_to_func(func, options): argspec = inspect.getfullargspec(func) kwargs_to_pass_down = set(argspec.kwonlyargs) args_to_pass_down = set(argspec.args) all_args = args_to_pass_down.union(kwargs_to_pass_down) - kwargs_to_pass_down = {k: v for k, v in kwargs.items() if k in all_args} + kwargs_to_pass_down = {k: v for k, v in options.items() if k in all_args} + if "options" in all_args: + kwargs_to_pass_down["options"] = options return kwargs_to_pass_down diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/__init__.py index 1100c1c58cf51..3832a103f6822 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/__init__.py @@ -1,3 +1,7 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime + +__all__ = ["MinMaxDatetime"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index 8f90766a5e253..d58ca36631c7d 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -3,38 +3,43 @@ # import datetime as dt -from typing import Any, Mapping, Optional, Union +from dataclasses import InitVar, dataclass, field +from typing import Any, Mapping, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +from dataclasses_jsonschema import JsonSchemaMixin -class MinMaxDatetime: +@dataclass +class MinMaxDatetime(JsonSchemaMixin): """ Compares the provided date against optional minimum or maximum times. If date is earlier than min_date, then min_date is returned. If date is greater than max_date, then max_date is returned. If neither, the input date is returned. + + Attributes: + datetime (Union[InterpolatedString, str]): InterpolatedString or string representing the datetime in the format specified by `datetime_format` + datetime_format (str): Format of the datetime passed as argument + min_datetime (Union[InterpolatedString, str]): Represents the minimum allowed datetime value. + max_datetime (Union[InterpolatedString, str]): Represents the maximum allowed datetime value. """ - def __init__( - self, - datetime: Union[InterpolatedString, str], - datetime_format: str = "", - min_datetime: Union[InterpolatedString, str] = "", - max_datetime: Union[InterpolatedString, str] = "", - **options: Optional[Mapping[str, Any]], - ): - """ - :param datetime: InterpolatedString or string representing the datetime in the format specified by `datetime_format` - :param datetime_format: Format of the datetime passed as argument - :param min_datetime: InterpolatedString or string representing the min datetime - :param max_datetime: InterpolatedString or string representing the max datetime - :param options: Additional runtime parameters to be used for string interpolation - """ - self._datetime_interpolator = InterpolatedString.create(datetime, options=options) - self._datetime_format = datetime_format + datetime: Union[InterpolatedString, str] + options: InitVar[Mapping[str, Any]] + # datetime_format is a unique case where we inherit it from the parent if it is not specified before using the default value + # which is why we need dedicated getter/setter methods and private dataclass field + datetime_format: str = "" + _datetime_format: str = field(init=False, repr=False, default="") + min_datetime: Union[InterpolatedString, str] = "" + max_datetime: Union[InterpolatedString, str] = "" + + def __post_init__(self, options: Mapping[str, Any]): + self.datetime = InterpolatedString.create(self.datetime, options=options or {}) + self.timezone = dt.timezone.utc + self.min_datetime = InterpolatedString.create(self.min_datetime, options=options) if self.min_datetime else None + self.max_datetime = InterpolatedString.create(self.max_datetime, options=options) if self.max_datetime else None + self._timezone = dt.timezone.utc - self._min_datetime_interpolator = InterpolatedString.create(min_datetime, options=options) if min_datetime else None - self._max_datetime_interpolator = InterpolatedString.create(max_datetime, options=options) if max_datetime else None def get_datetime(self, config, **additional_options) -> dt.datetime: """ @@ -48,19 +53,17 @@ def get_datetime(self, config, **additional_options) -> dt.datetime: if not datetime_format: datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" - time = dt.datetime.strptime(str(self._datetime_interpolator.eval(config, **additional_options)), datetime_format).replace( - tzinfo=self._timezone - ) + time = dt.datetime.strptime(str(self.datetime.eval(config, **additional_options)), datetime_format).replace(tzinfo=self._timezone) - if self._min_datetime_interpolator: - min_time = dt.datetime.strptime( - str(self._min_datetime_interpolator.eval(config, **additional_options)), datetime_format - ).replace(tzinfo=self._timezone) + if self.min_datetime: + min_time = dt.datetime.strptime(str(self.min_datetime.eval(config, **additional_options)), datetime_format).replace( + tzinfo=self._timezone + ) time = max(time, min_time) - if self._max_datetime_interpolator: - max_time = dt.datetime.strptime( - str(self._max_datetime_interpolator.eval(config, **additional_options)), datetime_format - ).replace(tzinfo=self._timezone) + if self.max_datetime: + max_time = dt.datetime.strptime(str(self.max_datetime.eval(config, **additional_options)), datetime_format).replace( + tzinfo=self._timezone + ) time = min(time, max_time) return time @@ -72,4 +75,7 @@ def datetime_format(self) -> str: @datetime_format.setter def datetime_format(self, value: str): """Setter for the datetime format""" - self._datetime_format = value + # Covers the case where datetime_format is not provided in the constructor, which causes the property object + # to be set which we need to avoid doing + if not isinstance(value, property): + self._datetime_format = value diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py index fce2e20b8e58e..feae3fa4d51a6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_stream.py @@ -2,7 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # - +from dataclasses import InitVar, dataclass, field from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union from airbyte_cdk.models import SyncMode @@ -11,46 +11,51 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.declarative.types import Config, StreamSlice from airbyte_cdk.sources.streams.core import Stream +from dataclasses_jsonschema import JsonSchemaMixin -class DeclarativeStream(Stream): +@dataclass +class DeclarativeStream(Stream, JsonSchemaMixin): """ DeclarativeStream is a Stream that delegates most of its logic to its schema_load and retriever + + Attributes: + stream_name (str): stream name + stream_primary_key (Optional[Union[str, List[str], List[List[str]]]]): the primary key of the stream + schema_loader (SchemaLoader): The schema loader + retriever (Retriever): The retriever + config (Config): The user-provided configuration as specified by the source's spec + stream_cursor_field (Optional[List[str]]): The cursor field + transformations (List[RecordTransformation]): A list of transformations to be applied to each output record in the + stream. Transformations are applied in the order in which they are defined. + checkpoint_interval (Optional[int]): How often the stream will checkpoint state (i.e: emit a STATE message) """ - def __init__( - self, - name: str, - primary_key, - schema_loader: SchemaLoader, - retriever: Retriever, - config: Config, - cursor_field: Optional[List[str]] = None, - transformations: List[RecordTransformation] = None, - checkpoint_interval: Optional[int] = None, - ): - """ - :param name: stream name - :param primary_key: the primary key of the stream - :param schema_loader: The schema loader - :param retriever: The retriever - :param cursor_field: The cursor field - :param transformations: A list of transformations to be applied to each output record in the stream. Transformations are applied - in the order in which they are defined. - """ - self._name = name - self._config = config - self._primary_key = primary_key - self._cursor_field = cursor_field or [] - self._schema_loader = schema_loader - self._retriever = retriever - self._transformations = transformations or [] - self._checkpoint_interval = checkpoint_interval + schema_loader: SchemaLoader + retriever: Retriever + config: Config + options: InitVar[Mapping[str, Any]] + name: str + _name: str = field(init=False, repr=False) + primary_key: Optional[Union[str, List[str], List[List[str]]]] + _primary_key: str = field(init=False, repr=False) + stream_cursor_field: Optional[List[str]] = None + transformations: List[RecordTransformation] = None + checkpoint_interval: Optional[int] = None + + def __post_init__(self, options: Mapping[str, Any]): + self.stream_cursor_field = self.stream_cursor_field or [] + self.transformations = self.transformations or [] @property def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: return self._primary_key + @primary_key.setter + def primary_key(self, value: str) -> None: + if not isinstance(value, property): + self._primary_key = value + @property def name(self) -> str: """ @@ -58,6 +63,11 @@ def name(self) -> str: """ return self._name + @name.setter + def name(self, value: str) -> None: + if not isinstance(value, property): + self._name = value + @property def state_checkpoint_interval(self) -> Optional[int]: """ @@ -70,16 +80,16 @@ def state_checkpoint_interval(self) -> Optional[int]: ascending order with respect to the cursor field. This can happen if the source does not support reading records in ascending order of created_at date (or whatever the cursor is). In those cases, state must only be saved once the full stream has been read. """ - return self._checkpoint_interval + return self.checkpoint_interval @property def state(self) -> MutableMapping[str, Any]: - return self._retriever.state + return self.retriever.state @state.setter def state(self, value: MutableMapping[str, Any]): """State setter, accept state serialized by state getter.""" - self._retriever.state = value + self.retriever.state = value def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]): return self.state @@ -90,7 +100,7 @@ def cursor_field(self) -> Union[str, List[str]]: Override to return the default cursor field used by this stream e.g: an API entity might always use created_at as the cursor field. :return: The name of the field used as a cursor. If the cursor is nested, return an array consisting of the path to the cursor. """ - return self._cursor_field + return self.stream_cursor_field def read_records( self, @@ -99,12 +109,12 @@ def read_records( stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: - for record in self._retriever.read_records(sync_mode, cursor_field, stream_slice, stream_state): - yield self._apply_transformations(record, self._config, stream_slice) + for record in self.retriever.read_records(sync_mode, cursor_field, stream_slice, stream_state): + yield self._apply_transformations(record, self.config, stream_slice) def _apply_transformations(self, record: Mapping[str, Any], config: Config, stream_slice: StreamSlice): output_record = record - for transformation in self._transformations: + for transformation in self.transformations: output_record = transformation.transform(record, config=config, stream_state=self.state, stream_slice=stream_slice) return output_record @@ -116,7 +126,7 @@ def get_json_schema(self) -> Mapping[str, Any]: The default implementation of this method looks for a JSONSchema file with the same name as this stream's "name" property. Override as needed. """ - return self._schema_loader.get_json_schema() + return self.schema_loader.get_json_schema() def stream_slices( self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None @@ -130,4 +140,4 @@ def stream_slices( :return: """ # this is not passing the cursor field because it is known at init time - return self._retriever.stream_slices(sync_mode=sync_mode, stream_state=stream_state) + return self.retriever.stream_slices(sync_mode=sync_mode, stream_state=stream_state) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/__init__.py index 46b7376756ec6..64a933247bdb5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/__init__.py @@ -1,3 +1,8 @@ # -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.decoders.decoder import Decoder +from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder + +__all__ = ["Decoder", "JsonDecoder"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py index 39a9b91c7747b..5ec36516f4fd2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/decoder.py @@ -3,11 +3,13 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Any, List, Mapping, Union import requests +@dataclass class Decoder(ABC): """ Decoder strategy to transform a requests.Response into a Mapping[str, Any] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 0e10d19805d82..0cea903656845 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -2,16 +2,20 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Union import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder +@dataclass class JsonDecoder(Decoder): """ Decoder strategy that returns the json-encoded content of a response, if any. """ + options: InitVar[Mapping[str, Any]] + def decode(self, response: requests.Response) -> Union[Mapping[str, Any], List]: return response.json() or {} diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py index 1100c1c58cf51..897f382ea0de2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py @@ -1,3 +1,10 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector +from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor +from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter +from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector + +__all__ = ["HttpSelector", "JelloExtractor", "RecordFilter", "RecordSelector"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py index dd02da0b42d95..517f61c70b799 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py @@ -3,12 +3,14 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Any, List, Mapping, Optional import requests from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +@dataclass class HttpSelector(ABC): """ Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py index 250d712d26f6d..f36613e2a56e7 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py @@ -2,40 +2,42 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import List, Union +from dataclasses import InitVar, dataclass +from typing import Any, List, Mapping, Union import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.types import Config, Record +from dataclasses_jsonschema import JsonSchemaMixin from jello import lib as jello_lib -class JelloExtractor: +@dataclass +class JelloExtractor(JsonSchemaMixin): """ Record extractor that evaluates a Jello query to extract records from a decoded response. More information on Jello can be found at https://github.com/kellyjonbrazil/jello + + Attributes: + transform (Union[InterpolatedString, str]): The Jello query to evaluate on the decoded response + config (Config): The user-provided configuration as specified by the source's spec + decoder (Decoder): The decoder responsible to transfom the response in a Mapping """ default_transform = "_" + transform: Union[InterpolatedString, str] + config: Config + options: InitVar[Mapping[str, Any]] + decoder: Decoder = JsonDecoder(options={}) - def __init__(self, transform: Union[InterpolatedString, str], config: Config, decoder: Decoder = JsonDecoder()): - """ - :param transform: The Jello query to evaluate on the decoded response - :param config: The user-provided configuration as specified by the source's spec - :param decoder: The decoder responsible to transfom the response in a Mapping - """ - - if isinstance(transform, str): - transform = InterpolatedString(transform, default=self.default_transform) - - self._transform = transform - self._decoder = decoder - self._config = config + def __post_init__(self, options: Mapping[str, Any]): + if isinstance(self.transform, str): + self.transform = InterpolatedString(string=self.transform, default=self.default_transform, options=options or {}) def extract_records(self, response: requests.Response) -> List[Record]: - response_body = self._decoder.decode(response) - script = self._transform.eval(self._config) + response_body = self.decoder.decode(response) + script = self.transform.eval(self.config) return jello_lib.pyquery(response_body, script) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py index 8f0b123ff8951..081dd75971300 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py @@ -2,26 +2,29 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass, field from typing import Any, List, Mapping, Optional from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class RecordFilter: +@dataclass +class RecordFilter(JsonSchemaMixin): """ Filter applied on a list of Records + + config (Config): The user-provided configuration as specified by the source's spec + condition (str): The string representing the predicate to filter a record. Records will be removed if evaluated to False """ - def __init__(self, config: Config, condition: str = "", **options: Optional[Mapping[str, Any]]): - """ - :param config: The user-provided configuration as specified by the source's spec - :param condition: The string representing the predicate to filter a record. Records will be removed if evaluated to False - :param options: Additional runtime parameters to be used for string interpolation - """ - self._config = config - self._filter_interpolator = InterpolatedBoolean(condition) - self._options = options + options: InitVar[Mapping[str, Any]] + config: Config = field(default=dict) + condition: str = "" + + def __post_init__(self, options: Mapping[str, Any]): + self._filter_interpolator = InterpolatedBoolean(condition=self.condition, options=options) def filter_records( self, @@ -31,4 +34,4 @@ def filter_records( next_page_token: Optional[Mapping[str, Any]] = None, ) -> List[Record]: kwargs = {"stream_state": stream_state, "stream_slice": stream_slice, "next_page_token": next_page_token} - return [record for record in records if self._filter_interpolator.eval(self._config, record=record, **kwargs)] + return [record for record in records if self._filter_interpolator.eval(self.config, record=record, **kwargs)] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py index 193f0e7576eba..8f27fb125b9cc 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Optional import requests @@ -9,22 +10,25 @@ from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class RecordSelector(HttpSelector): +@dataclass +class RecordSelector(HttpSelector, JsonSchemaMixin): """ Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering records based on a heuristic. + + Attributes: + extractor (JelloExtractor): The record extractor responsible for extracting records from a response + record_filter (RecordFilter): The record filter responsible for filtering extracted records """ - def __init__(self, extractor: JelloExtractor, record_filter: RecordFilter = None, **options: Optional[Mapping[str, Any]]): - """ - :param extractor: The record extractor responsible for extracting records from a response - :param record_filter: The record filter responsible for filtering extracted records - :param options: Additional runtime parameters to be used for string interpolation - """ - self._extractor = extractor - self._record_filter = record_filter + extractor: JelloExtractor + options: InitVar[Mapping[str, Any]] + record_filter: RecordFilter = None + + def __post_init__(self, options: Mapping[str, Any]): self._options = options def select_records( @@ -34,9 +38,9 @@ def select_records( stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> List[Record]: - all_records = self._extractor.extract_records(response) - if self._record_filter: - return self._record_filter.filter_records( + all_records = self.extractor.extract_records(response) + if self.record_filter: + return self.record_filter.filter_records( all_records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) return all_records diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/__init__.py index 46b7376756ec6..1f1b53a1910ac 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/__init__.py @@ -1,3 +1,9 @@ # -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean +from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping +from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString + +__all__ = ["InterpolatedBoolean", "InterpolatedMapping", "InterpolatedString"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py index 8eff06e3bd8a2..f7979dd69e46f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py @@ -2,26 +2,29 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import Any, Final, List, Mapping, Optional +from dataclasses import InitVar, dataclass +from typing import Any, Final, List, Mapping from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.declarative.types import Config +from dataclasses_jsonschema import JsonSchemaMixin FALSE_VALUES: Final[List[Any]] = ["False", "false", "{}", "[]", "()", "", "0", "0.0", "False", "false", {}, False, [], (), set()] -class InterpolatedBoolean: +@dataclass +class InterpolatedBoolean(JsonSchemaMixin): f""" Wrapper around a string to be evaluated to a boolean value. The string will be evaluated as False if it interpolates to a value in {FALSE_VALUES} + + Attributes: + condition (str): The string representing the condition to evaluate to a boolean """ + condition: str + options: InitVar[Mapping[str, Any]] - def __init__(self, condition: str, **options: Optional[Mapping[str, Any]]): - """ - :param condition: The string representing the condition to evaluate to a boolean - :param options: Additional runtime parameters to be used for string interpolation - """ - self._condition = condition + def __post_init__(self, options: Mapping[str, Any]): self._default = "False" self._interpolation = JinjaInterpolation() self._options = options @@ -34,10 +37,10 @@ def eval(self, config: Config, **additional_options): :param additional_options: Optional parameters used for interpolation :return: The interpolated string """ - if isinstance(self._condition, bool): - return self._condition + if isinstance(self.condition, bool): + return self.condition else: - evaluated = self._interpolation.eval(self._condition, config, self._default, options=self._options, **additional_options) + evaluated = self._interpolation.eval(self.condition, config, self._default, options=self._options, **additional_options) if evaluated in FALSE_VALUES: return False # The presence of a value is generally regarded as truthy, so we treat it as such diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py index fc46cf4f8552b..6c1f80886a529 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py @@ -2,23 +2,30 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import Any, Mapping + +from dataclasses import InitVar, dataclass +from typing import Any, Mapping, Optional from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.declarative.types import Config +from dataclasses_jsonschema import JsonSchemaMixin -class InterpolatedMapping: - """Wrapper around a Mapping[str, str] where both the keys and values are to be interpolated.""" +@dataclass +class InterpolatedMapping(JsonSchemaMixin): + """ + Wrapper around a Mapping[str, str] where both the keys and values are to be interpolated. - def __init__(self, mapping: Mapping[str, Any], options: Mapping[str, Any]): - """ - :param mapping: Mapping[str, str] to be evaluated - :param options: Additional runtime parameters to be used for string interpolation - """ - self._mapping = mapping - self._options = options + Attributes: + mapping (Mapping[str, str]): to be evaluated + """ + + mapping: Mapping[str, str] + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Optional[Mapping[str, Any]]): self._interpolation = JinjaInterpolation() + self._options = options def eval(self, config: Config, **additional_options): """ @@ -32,7 +39,7 @@ def eval(self, config: Config, **additional_options): self._interpolation.eval(name, config, options=self._options, **additional_options): self._eval( value, config, **additional_options ) - for name, value in self._mapping.items() + for name, value in self.mapping.items() } return interpolated_values diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py index 3c2171a7d9d2f..145be0d949d0e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py @@ -2,27 +2,33 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, Mapping, Optional, Union from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation from airbyte_cdk.sources.declarative.types import Config +from dataclasses_jsonschema import JsonSchemaMixin -class InterpolatedString: +@dataclass +class InterpolatedString(JsonSchemaMixin): """ Wrapper around a raw string to be interpolated with the Jinja2 templating engine + + Attributes: + string (str): The string to evalute + default (Optional[str]): The default value to return if the evaluation returns an empty string + options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - def __init__(self, string: str, *, options: Mapping[str, Any] = {}, default: Optional[str] = None): - """ - :param string: The string to evalute - :param default: The default value to return if the evaluation returns an empty string - :param options: Additional runtime parameters to be used for string interpolation - """ - self._string = string - self._default = default or string + string: str + options: InitVar[Mapping[str, Any]] + default: Optional[str] = None + + def __post_init__(self, options: Mapping[str, Any]): + self.default = self.default or self.string self._interpolation = JinjaInterpolation() - self._options = options or {} + self._options = options def eval(self, config: Config, **kwargs): """ @@ -32,12 +38,12 @@ def eval(self, config: Config, **kwargs): :param kwargs: Optional parameters used for interpolation :return: The interpolated string """ - return self._interpolation.eval(self._string, config, self._default, options=self._options, **kwargs) + return self._interpolation.eval(self.string, config, self.default, options=self._options, **kwargs) def __eq__(self, other): if not isinstance(other, InterpolatedString): return False - return self._string == other._string and self._default == other._default + return self.string == other.string and self.default == other.default @classmethod def create( @@ -54,6 +60,6 @@ def create( :return: InterpolatedString representing the input string. """ if isinstance(string_or_interpolated, str): - return InterpolatedString(string_or_interpolated, options=options) + return InterpolatedString(string=string_or_interpolated, options=options) else: return string_or_interpolated diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/__init__.py index 46b7376756ec6..ca8377e6fc979 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/__init__.py @@ -1,3 +1,9 @@ # -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.requesters.http_requester import HttpRequester +from airbyte_cdk.sources.declarative.requesters.request_option import RequestOption +from airbyte_cdk.sources.declarative.requesters.requester import Requester + +__all__ = ["HttpRequester", "RequestOption", "Requester"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py index 1100c1c58cf51..f2602eea94b53 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py @@ -1,3 +1,11 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import BackoffStrategy +from airbyte_cdk.sources.declarative.requesters.error_handlers.composite_error_handler import CompositeErrorHandler +from airbyte_cdk.sources.declarative.requesters.error_handlers.default_error_handler import DefaultErrorHandler +from airbyte_cdk.sources.declarative.requesters.error_handlers.error_handler import ErrorHandler +from airbyte_cdk.sources.declarative.requesters.error_handlers.http_response_filter import HttpResponseFilter + +__all__ = ["BackoffStrategy", "CompositeErrorHandler", "DefaultErrorHandler", "ErrorHandler", "HttpResponseFilter"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py index 1100c1c58cf51..15472c2bd76a9 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py @@ -1,3 +1,21 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.constant_backoff_strategy import ConstantBackoffStrategy +from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.exponential_backoff_strategy import ( + ExponentialBackoffStrategy, +) +from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.wait_time_from_header_backoff_strategy import ( + WaitTimeFromHeaderBackoffStrategy, +) +from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.wait_until_time_from_header_backoff_strategy import ( + WaitUntilTimeFromHeaderBackoffStrategy, +) + +__all__ = [ + "ConstantBackoffStrategy", + "ExponentialBackoffStrategy", + "WaitTimeFromHeaderBackoffStrategy", + "WaitUntilTimeFromHeaderBackoffStrategy", +] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py index d100dff0d2e4d..3a7df2dc7b365 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py @@ -2,22 +2,24 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import dataclass from typing import Optional import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import BackoffStrategy +from dataclasses_jsonschema import JsonSchemaMixin -class ConstantBackoffStrategy(BackoffStrategy): +@dataclass +class ConstantBackoffStrategy(BackoffStrategy, JsonSchemaMixin): """ Backoff strategy with a constant backoff interval + + Attributes: + backoff_time_in_seconds (float): time to backoff before retrying a retryable request. """ - def __init__(self, backoff_time_in_seconds: float): - """ - :param backoff_time_in_seconds: time to backoff before retrying a retryable request - """ - self._backoff_time_in_seconds = backoff_time_in_seconds + backoff_time_in_seconds: float def backoff(self, response: requests.Response, attempt_count: int) -> Optional[float]: - return self._backoff_time_in_seconds + return self.backoff_time_in_seconds diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py index 71c24ff3ea525..75a52ffca3756 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py @@ -2,22 +2,24 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import dataclass from typing import Optional import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import BackoffStrategy +from dataclasses_jsonschema import JsonSchemaMixin -class ExponentialBackoffStrategy(BackoffStrategy): +@dataclass +class ExponentialBackoffStrategy(BackoffStrategy, JsonSchemaMixin): """ Backoff strategy with an exponential backoff interval + + Attributes: + factor (float): multiplicative factor """ - def __init__(self, factor: float = 5): - """ - :param factor: multiplicative factor - """ - self._factor = factor + factor: float = 5 def backoff(self, response: requests.Response, attempt_count: int) -> Optional[float]: - return self._factor * 2**attempt_count + return self.factor * 2**attempt_count diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py index 7a30053554c66..3ff279c78eb5c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py @@ -3,26 +3,31 @@ # import re +from dataclasses import dataclass from typing import Optional import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.header_helper import get_numeric_value_from_header from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import BackoffStrategy +from dataclasses_jsonschema import JsonSchemaMixin -class WaitTimeFromHeaderBackoffStrategy(BackoffStrategy): +@dataclass +class WaitTimeFromHeaderBackoffStrategy(BackoffStrategy, JsonSchemaMixin): """ Extract wait time from http header + + Attributes: + header (str): header to read wait time from + regex (Optional[str]): optional regex to apply on the header to extract its value """ - def __init__(self, header: str, regex: Optional[str] = None): - """ - :param header: header to read wait time from - :param regex: optional regex to apply on the header to extract its value - """ - self._header = header - self._regex = re.compile(regex) if regex else None + header: str + regex: Optional[str] = None + + def __post_init__(self): + self.regex = re.compile(self.regex) if self.regex else None def backoff(self, response: requests.Response, attempt_count: int) -> Optional[float]: - header_value = get_numeric_value_from_header(response, self._header, self._regex) + header_value = get_numeric_value_from_header(response, self.header, self.regex) return header_value diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py index a406bd5d05832..0e56741035ba6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py @@ -5,41 +5,45 @@ import numbers import re import time +from dataclasses import dataclass from typing import Optional import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.header_helper import get_numeric_value_from_header from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import BackoffStrategy +from dataclasses_jsonschema import JsonSchemaMixin -class WaitUntilTimeFromHeaderBackoffStrategy(BackoffStrategy): +@dataclass +class WaitUntilTimeFromHeaderBackoffStrategy(BackoffStrategy, JsonSchemaMixin): """ Extract time at which we can retry the request from response header and wait for the difference between now and that time + + Attributes: + header (str): header to read wait time from + min_wait (Optional[float]): minimum time to wait for safety + regex (Optional[str]): optional regex to apply on the header to extract its value """ - def __init__(self, header: str, min_wait: Optional[float] = None, regex: Optional[str] = None): - """ + header: str + min_wait: Optional[float] = None + regex: Optional[str] = None - :param header: header to read wait time from - :param min_wait: minimum time to wait for safety - :param regex: optional regex to apply on the header to extract its value - """ - self._header = header - self._min_wait = min_wait - self._regex = re.compile(regex) if regex else None + def __post_init__(self): + self.regex = re.compile(self.regex) if self.regex else None def backoff(self, response: requests.Response, attempt_count: int) -> Optional[float]: now = time.time() - wait_until = get_numeric_value_from_header(response, self._header, self._regex) + wait_until = get_numeric_value_from_header(response, self.header, self.regex) if wait_until is None or not wait_until: - return self._min_wait + return self.min_wait if (isinstance(wait_until, str) and wait_until.isnumeric()) or isinstance(wait_until, numbers.Number): wait_time = float(wait_until) - now else: - return self._min_wait - if self._min_wait: - return max(wait_time, self._min_wait) + return self.min_wait + if self.min_wait: + return max(wait_time, self.min_wait) elif wait_time < 0: return None return wait_time diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py index 55723e41f9154..00c1b6dff23b6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py @@ -3,11 +3,13 @@ # from abc import abstractmethod +from dataclasses import dataclass from typing import Optional import requests +@dataclass class BackoffStrategy: """ Backoff strategy defining how long to wait before retrying a request that resulted in an error. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py index 906b16c3d81e1..0c2cfe5da878f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py @@ -2,16 +2,19 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import List, Union +from dataclasses import InitVar, dataclass +from typing import Any, List, Mapping, Union import airbyte_cdk.sources.declarative.requesters.error_handlers.response_status as response_status import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.error_handler import ErrorHandler from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus +from dataclasses_jsonschema import JsonSchemaMixin -class CompositeErrorHandler(ErrorHandler): +@dataclass +class CompositeErrorHandler(ErrorHandler, JsonSchemaMixin): """ Error handler that sequentially iterates over a list of `ErrorHandler`s @@ -31,23 +34,24 @@ class CompositeErrorHandler(ErrorHandler): backoff_strategies: - type: "ConstantBackoffStrategy" backoff_time_in_seconds: 10 + Attributes: + error_handlers (List[ErrorHandler]): list of error handlers """ - def __init__(self, error_handlers: List[ErrorHandler]): - """ - :param error_handlers: list of error handlers - """ - self._error_handlers = error_handlers - if not self._error_handlers: + error_handlers: List[ErrorHandler] + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): + if not self.error_handlers: raise ValueError("CompositeErrorHandler expects at least 1 underlying error handler") @property def max_retries(self) -> Union[int, None]: - return self._error_handlers[0].max_retries + return self.error_handlers[0].max_retries def should_retry(self, response: requests.Response) -> ResponseStatus: should_retry = None - for retrier in self._error_handlers: + for retrier in self.error_handlers: should_retry = retrier.should_retry(response) if should_retry.action == ResponseAction.SUCCESS: return response_status.SUCCESS diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index 5bcda8231c0f9..179db638661f5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -2,7 +2,8 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import List, MutableMapping, Optional, Union +from dataclasses import InitVar, dataclass, field +from typing import Any, List, Mapping, MutableMapping, Optional, Union import airbyte_cdk.sources.declarative.requesters.error_handlers.response_status as response_status import requests @@ -14,9 +15,11 @@ from airbyte_cdk.sources.declarative.requesters.error_handlers.http_response_filter import HttpResponseFilter from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus +from dataclasses_jsonschema import JsonSchemaMixin -class DefaultErrorHandler(ErrorHandler): +@dataclass +class DefaultErrorHandler(ErrorHandler, JsonSchemaMixin): """ Default error handler. @@ -77,32 +80,33 @@ class DefaultErrorHandler(ErrorHandler): - http_codes: [ 404 ] action: RETRY ` + + Attributes: + response_filters (Optional[List[HttpResponseFilter]]): response filters to iterate on + max_retries (Optional[int]): maximum retry attempts + backoff_strategies (Optional[List[BackoffStrategy]]): list of backoff strategies to use to determine how long + to wait before retrying """ DEFAULT_BACKOFF_STRATEGY = ExponentialBackoffStrategy - def __init__( - self, - response_filters: Optional[List[HttpResponseFilter]] = None, - max_retries: Optional[int] = 5, - backoff_strategies: Optional[List[BackoffStrategy]] = None, - ): - """ - :param response_filters: response filters to iterate on - :param max_retries: maximum retry attemps - :param backoff_strategies: list of backoff strategies to use to determine how long to wait before retrying - """ - self._max_retries = max_retries - self._response_filters = response_filters or [] - - if not response_filters: - self._response_filters.append(HttpResponseFilter(ResponseAction.RETRY, http_codes=HttpResponseFilter.DEFAULT_RETRIABLE_ERRORS)) - self._response_filters.append(HttpResponseFilter(ResponseAction.IGNORE)) - - if backoff_strategies: - self._backoff_strategies = backoff_strategies - else: - self._backoff_strategies = [DefaultErrorHandler.DEFAULT_BACKOFF_STRATEGY()] + options: InitVar[Mapping[str, Any]] + response_filters: Optional[List[HttpResponseFilter]] = None + max_retries: Optional[int] = 5 + _max_retries: int = field(init=False, repr=False, default=5) + backoff_strategies: Optional[List[BackoffStrategy]] = None + + def __post_init__(self, options: Mapping[str, Any]): + self.response_filters = self.response_filters or [] + + if not self.response_filters: + self.response_filters.append( + HttpResponseFilter(ResponseAction.RETRY, http_codes=HttpResponseFilter.DEFAULT_RETRIABLE_ERRORS, options={}) + ) + self.response_filters.append(HttpResponseFilter(ResponseAction.IGNORE, options={})) + + if not self.backoff_strategies: + self.backoff_strategies = [DefaultErrorHandler.DEFAULT_BACKOFF_STRATEGY()] self._last_request_to_attempt_count: MutableMapping[requests.PreparedRequest, int] = {} @@ -110,6 +114,13 @@ def __init__( def max_retries(self) -> Union[int, None]: return self._max_retries + @max_retries.setter + def max_retries(self, value: Union[int, None]): + # Covers the case where max_retries is not provided in the constructor, which causes the property object + # to be set which we need to avoid doing + if not isinstance(value, property): + self._max_retries = value + def should_retry(self, response: requests.Response) -> ResponseStatus: request = response.request @@ -117,7 +128,7 @@ def should_retry(self, response: requests.Response) -> ResponseStatus: self._last_request_to_attempt_count = {request: 1} else: self._last_request_to_attempt_count[request] += 1 - for response_filter in self._response_filters: + for response_filter in self.response_filters: filter_action = response_filter.matches(response) if filter_action is not None: if filter_action == ResponseAction.RETRY: @@ -131,7 +142,7 @@ def should_retry(self, response: requests.Response) -> ResponseStatus: def _backoff_time(self, response: requests.Response, attempt_count: int) -> Optional[float]: backoff = None - for backoff_strategies in self._backoff_strategies: + for backoff_strategies in self.backoff_strategies: backoff = backoff_strategies.backoff(response, attempt_count) if backoff: return backoff diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py index f42e4ef9401af..50b6412ad350e 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py @@ -3,12 +3,14 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Union import requests from airbyte_cdk.sources.declarative.requesters.error_handlers.response_status import ResponseStatus +@dataclass class ErrorHandler(ABC): """ Defines whether a request was successful and how to handle a failure. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index 69790dad9f912..2da7f6272f218 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -2,42 +2,43 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import Optional, Set, Union +from dataclasses import InitVar, dataclass +from typing import Any, Mapping, Optional, Set, Union import requests from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean from airbyte_cdk.sources.declarative.requesters.error_handlers.response_action import ResponseAction from airbyte_cdk.sources.streams.http.http import HttpStream +from dataclasses_jsonschema import JsonSchemaMixin -class HttpResponseFilter: +@dataclass +class HttpResponseFilter(JsonSchemaMixin): """ Filter to select HttpResponses + + Attributes: + action (Union[ResponseAction, str]): action to execute if a request matches + http_codes (Set[int]): http code of matching requests + error_message_contains (str): error substring of matching requests + predicate (str): predicate to apply to determine if a request is matching """ TOO_MANY_REQUESTS_ERRORS = {429} DEFAULT_RETRIABLE_ERRORS = set([x for x in range(500, 600)]).union(TOO_MANY_REQUESTS_ERRORS) - def __init__( - self, action: Union[ResponseAction, str], *, http_codes: Set[int] = None, error_message_contain: str = None, predicate: str = "" - ): - """ - :param action: action to execute if a request matches - :param http_codes: http code of matching requests - :param error_message_contain: error substring of matching requests - :param predicate: predicate to apply to determine if a request is matching - """ - if isinstance(action, str): - action = ResponseAction[action] - self._http_codes = http_codes or set() - self._predicate = InterpolatedBoolean(predicate) - self._error_message_contains = error_message_contain - self._action = action + action: Union[ResponseAction, str] + options: InitVar[Mapping[str, Any]] + http_codes: Set[int] = None + error_message_contains: str = None + predicate: Union[InterpolatedBoolean, str] = "" - @property - def action(self) -> ResponseAction: - """The ResponseAction to execute when a response matches the filter""" - return self._action + def __post_init__(self, options: Mapping[str, Any]): + if isinstance(self.action, str): + self.action = ResponseAction[self.action] + self.http_codes = self.http_codes or set() + if isinstance(self.predicate, str): + self.predicate = InterpolatedBoolean(condition=self.predicate, options=options) def matches(self, response: requests.Response) -> Optional[ResponseAction]: """ @@ -46,20 +47,20 @@ def matches(self, response: requests.Response) -> Optional[ResponseAction]: :return: The action to execute. None if the response does not match the filter """ if ( - response.status_code in self._http_codes + response.status_code in self.http_codes or (self._response_matches_predicate(response)) or (self._response_contains_error_message(response)) ): - return self._action + return self.action else: return None def _response_matches_predicate(self, response: requests.Response) -> bool: - return self._predicate and self._predicate.eval(None, response=response.json()) + return self.predicate and self.predicate.eval(None, response=response.json()) def _response_contains_error_message(self, response: requests.Response) -> bool: - if not self._error_message_contains: + if not self.error_message_contains: return False else: error_message = HttpStream.parse_response_error_message(response) - return error_message and self._error_message_contains in error_message + return error_message and self.error_message_contains in error_message diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 651b58186e5d4..4658e66c704f3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from functools import lru_cache from typing import Any, Mapping, MutableMapping, Optional, Union @@ -17,64 +18,66 @@ from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState from airbyte_cdk.sources.streams.http.auth import HttpAuthenticator, NoAuth +from dataclasses_jsonschema import JsonSchemaMixin -class HttpRequester(Requester): +@dataclass +class HttpRequester(Requester, JsonSchemaMixin): """ Default implementation of a Requester + + Attributes: + name (str): Name of the stream. Only used for request/response caching + url_base (InterpolatedString): Base url to send requests to + path (InterpolatedString): Path to send requests to + http_method (Union[str, HttpMethod]): HTTP method to use when sending requests + request_options_provider (Optional[RequestOptionsProvider]): request option provider defining the options to set on outgoing requests + authenticator (HttpAuthenticator): Authenticator defining how to authenticate to the source + error_handler (Optional[ErrorHandler]): Error handler defining how to detect and handle errors + config (Config): The user-provided configuration as specified by the source's spec """ - def __init__( - self, - *, - name: str, - url_base: InterpolatedString, - path: InterpolatedString, - http_method: Union[str, HttpMethod] = HttpMethod.GET, - request_options_provider: Optional[RequestOptionsProvider] = None, - authenticator: HttpAuthenticator = None, - error_handler: Optional[ErrorHandler] = None, - config: Config, - **options: Optional[Mapping[str, Any]], - ): - """ - :param name: Name of the stream. Only used for request/response caching - :param url_base: Base url to send requests to - :param path: Path to send requests to - :param http_method: HTTP method to use when sending requests - :param request_options_provider: request option provider defining the options to set on outgoing requests - :param authenticator: Authenticator defining how to authenticate to the source - :param error_handler: Error handler defining how to detect and handle errors - :param config: The user-provided configuration as specified by the source's spec - :param options: Additional runtime parameters to be used for string interpolation - """ - if request_options_provider is None: - request_options_provider = InterpolatedRequestOptionsProvider(config=config) - elif isinstance(request_options_provider, dict): - request_options_provider = InterpolatedRequestOptionsProvider(config=config, **request_options_provider) - self._name = name - self._authenticator = authenticator or NoAuth() - self._url_base = url_base - self._path: InterpolatedString = path - if type(http_method) == str: - http_method = HttpMethod[http_method] - self._method = http_method - self._request_options_provider = request_options_provider - self._error_handler = error_handler or DefaultErrorHandler() - self._config = config + name: str + url_base: InterpolatedString + path: InterpolatedString + config: Config + options: InitVar[Mapping[str, Any]] + http_method: Union[str, HttpMethod] = HttpMethod.GET + request_options_provider: Optional[RequestOptionsProvider] = None + authenticator: HttpAuthenticator = None + error_handler: Optional[ErrorHandler] = None + + def __post_init__(self, options: Mapping[str, Any]): + if self.request_options_provider is None: + self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, options=options) + elif isinstance(self.request_options_provider, dict): + self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, **self.request_options_provider) + else: + self._request_options_provider = self.request_options_provider + self.authenticator = self.authenticator or NoAuth() + if type(self.http_method) == str: + self.http_method = HttpMethod[self.http_method] + self._method = self.http_method + self.error_handler = self.error_handler or DefaultErrorHandler(options=options) self._options = options + # We are using an LRU cache in should_retry() method which requires all incoming arguments (including self) to be hashable. + # Dataclasses by default are not hashable, so we need to define __hash__(). Alternatively, we can set @dataclass(frozen=True), + # but this has a cascading effect where all dataclass fields must also be set to frozen. + def __hash__(self): + return hash(tuple(self.__dict__)) + def get_authenticator(self): - return self._authenticator + return self.authenticator def get_url_base(self): - return self._url_base.eval(self._config) + return self.url_base.eval(self.config) def get_path( self, *, stream_state: Optional[StreamState], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]] ) -> str: kwargs = {"stream_state": stream_state, "stream_slice": stream_slice, "next_page_token": next_page_token} - path = self._path.eval(self._config, **kwargs) + path = self.path.eval(self.config, **kwargs) return path def get_method(self): @@ -85,49 +88,49 @@ def get_method(self): @lru_cache(maxsize=10) def should_retry(self, response: requests.Response) -> ResponseStatus: # Cache the result because the HttpStream first checks if we should retry before looking at the backoff time - return self._error_handler.should_retry(response) + return self.error_handler.should_retry(response) - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> MutableMapping[str, Any]: - return self._request_options_provider.request_params( + return self._request_options_provider.get_request_params( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: - return self._request_options_provider.request_headers( + return self._request_options_provider.get_request_headers( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Optional[Union[Mapping, str]]: - return self._request_options_provider.request_body_data( + return self._request_options_provider.get_request_body_data( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Optional[Mapping]: - return self._request_options_provider.request_body_json( + return self._request_options_provider.get_request_body_json( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) @@ -145,7 +148,7 @@ def request_kwargs( @property def cache_filename(self) -> str: # FIXME: this should be declarative - return f"{self._name}.yml" + return f"{self.name}.yml" @property def use_cache(self) -> bool: diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/__init__.py index 46b7376756ec6..d0310b21c199c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/__init__.py @@ -1,3 +1,10 @@ # -# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.requesters.paginators.limit_paginator import LimitPaginator +from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination +from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import PaginationStrategy + +__all__ = ["LimitPaginator", "NoPagination", "PaginationStrategy", "Paginator"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/limit_paginator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/limit_paginator.py index 6d34cdd5a32df..675270f0da879 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/limit_paginator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/limit_paginator.py @@ -2,19 +2,22 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import Any, List, Mapping, Optional +from dataclasses import InitVar, dataclass, field +from typing import Any, List, Mapping, Optional, Union import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.declarative.requesters.paginators.pagination_strategy import PaginationStrategy from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import PaginationStrategy from airbyte_cdk.sources.declarative.requesters.request_option import RequestOption, RequestOptionType from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class LimitPaginator(Paginator): +@dataclass +class LimitPaginator(Paginator, JsonSchemaMixin): """ Limit paginator to request pages of results with a fixed size until the pagination strategy no longer returns a next_page_token @@ -24,9 +27,9 @@ class LimitPaginator(Paginator): * updates the request path with "{{ response._metadata.next }}" paginator: type: "LimitPaginator" - limit_value: 10 + page_size: 10 limit_option: - option_type: request_parameter + inject_into: request_parameter field_name: page_size page_token_option: option_type: path @@ -41,9 +44,9 @@ class LimitPaginator(Paginator): ` paginator: type: "LimitPaginator" - limit_value: 5 + page_size: 5 limit_option: - option_type: header + inject_into: header field_name: page_size pagination_strategy: type: "OffsetIncrement" @@ -58,66 +61,57 @@ class LimitPaginator(Paginator): ` paginator: type: "LimitPaginator" - limit_value: 5 + page_size: 5 limit_option: - option_type: request_parameter + inject_into: request_parameter field_name: page_size pagination_strategy: type: "PageIncrement" page_token: option_type: "request_parameter" field_name: "page" + + Attributes: + page_size (int): the number of records to request + limit_option (RequestOption): the request option to set the limit. Cannot be injected in the path. + page_token_option (RequestOption): the request option to set the page token + pagination_strategy (PaginationStrategy): Strategy defining how to get the next page token + config (Config): connection config + url_base (Union[InterpolatedString, str]): endpoint's base url + decoder (Decoder): decoder to decode the response """ - def __init__( - self, - page_size: int, - limit_option: RequestOption, - page_token_option: RequestOption, - pagination_strategy: PaginationStrategy, - config: Config, - url_base: str, - decoder: Decoder = None, - **options: Optional[Mapping[str, Any]], - ): - """ - :param page_size: The number of records to request - :param limit_option: The request option to set the limit. Cannot be injected in the path. - :param page_token_option: The request option to set the page token - :param pagination_strategy: The strategy defining how to get the next page token - :param config: The user-provided configuration as specified by the source's spec - :param url_base: The endpoint's base url - :param decoder: The decoder to decode the response - :param options: Additional runtime parameters to be used for string interpolation - """ - if limit_option.inject_into == RequestOptionType.path: + page_size: int + limit_option: RequestOption + page_token_option: RequestOption + pagination_strategy: PaginationStrategy + config: Config + url_base: Union[InterpolatedString, str] + options: InitVar[Mapping[str, Any]] + decoder: Decoder = JsonDecoder(options={}) + _token: Optional[Any] = field(init=False, repr=False, default=None) + + def __post_init__(self, options: Mapping[str, Any]): + if self.limit_option.inject_into == RequestOptionType.path: raise ValueError("Limit parameter cannot be a path") - self._page_size = page_size - self._config = config - self._limit_option = limit_option - self._page_token_option = page_token_option - self._pagination_strategy = pagination_strategy - self._token = None - if isinstance(url_base, str): - url_base = InterpolatedString.create(url_base, options=options) - self._url_base = url_base - self._decoder = decoder or JsonDecoder() + if isinstance(self.url_base, str): + self.url_base = InterpolatedString(string=self.url_base, options=options) def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]: - self._token = self._pagination_strategy.next_page_token(response, last_records) + self._token = self.pagination_strategy.next_page_token(response, last_records) if self._token: return {"next_page_token": self._token} else: return None def path(self): - if self._token and self._page_token_option.inject_into == RequestOptionType.path: + if self._token and self.page_token_option.inject_into == RequestOptionType.path: # Replace url base to only return the path - return str(self._token).replace(self._url_base.eval(self._config), "") + return str(self._token).replace(self.url_base.eval(self.config), "") else: return None - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, @@ -126,7 +120,7 @@ def request_params( ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter) - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, @@ -135,7 +129,7 @@ def request_headers( ) -> Mapping[str, str]: return self._get_request_options(RequestOptionType.header) - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, @@ -144,7 +138,7 @@ def request_body_data( ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_data) - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, @@ -155,10 +149,10 @@ def request_body_json( def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]: options = {} - if self._page_token_option.inject_into == option_type: + if self.page_token_option.inject_into == option_type: if option_type != RequestOptionType.path and self._token: - options[self._page_token_option.field_name] = self._token - if self._limit_option.inject_into == option_type: + options[self.page_token_option.field_name] = self._token + if self.limit_option.inject_into == option_type: if option_type != RequestOptionType.path: - options[self._limit_option.field_name] = self._page_size + options[self.limit_option.field_name] = self.page_size return options diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py index 8877c829a7a29..ac54ba0bc70e5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Optional, Union import requests @@ -9,15 +10,18 @@ from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState +@dataclass class NoPagination(Paginator): """ Pagination implementation that never returns a next page. """ + options: InitVar[Mapping[str, Any]] + def path(self) -> Optional[str]: return None - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, @@ -26,7 +30,7 @@ def request_params( ) -> Mapping[str, Any]: return {} - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, @@ -35,7 +39,7 @@ def request_headers( ) -> Mapping[str, str]: return {} - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, @@ -44,7 +48,7 @@ def request_body_data( ) -> Union[Mapping[str, Any], str]: return {} - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py index 084d3b6a5e880..e77ca744b3ed2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py @@ -3,12 +3,14 @@ # from abc import abstractmethod +from dataclasses import dataclass from typing import Any, List, Mapping, Optional import requests from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider +@dataclass class Paginator(RequestOptionsProvider): """ Defines the token to use to fetch the next page of records from the API. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py index 1100c1c58cf51..4b4f9d259d9b7 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py @@ -1,3 +1,9 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.cursor_pagination_strategy import CursorPaginationStrategy +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.offset_increment import OffsetIncrement +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import PageIncrement + +__all__ = ["CursorPaginationStrategy", "OffsetIncrement", "PageIncrement"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py index f583a28d6011e..09d036580f8f2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Optional, Union import requests @@ -9,43 +10,39 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.declarative.requesters.paginators.pagination_strategy import PaginationStrategy +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import PaginationStrategy from airbyte_cdk.sources.declarative.types import Config +from dataclasses_jsonschema import JsonSchemaMixin -class CursorPaginationStrategy(PaginationStrategy): +@dataclass +class CursorPaginationStrategy(PaginationStrategy, JsonSchemaMixin): """ Pagination strategy that evaluates an interpolated string to define the next page token + + Attributes: + cursor_value (Union[InterpolatedString, str]): template string evaluating to the cursor value + config (Config): connection config + stop_condition (Optional[InterpolatedBoolean]): template string evaluating when to stop paginating + decoder (Decoder): decoder to decode the response """ - def __init__( - self, - cursor_value: Union[InterpolatedString, str], - config: Config, - stop_condition: Optional[InterpolatedBoolean] = None, - decoder: Optional[Decoder] = None, - **options: Optional[Mapping[str, Any]], - ): - """ - :param cursor_value: template string evaluating to the cursor value - :param config: connection config - :param stop_condition: template string evaluating when to stop paginating - :param decoder: decoder to decode the response - :param options: Additional runtime parameters to be used for string interpolation - """ - if isinstance(cursor_value, str): - cursor_value = InterpolatedString.create(cursor_value, options=options) - self._cursor_value = cursor_value - self._config = config - self._decoder = decoder or JsonDecoder() - self._stop_condition = stop_condition + cursor_value: Union[InterpolatedString, str] + config: Config + options: InitVar[Mapping[str, Any]] + stop_condition: Optional[InterpolatedBoolean] = None + decoder: Decoder = JsonDecoder(options={}) + + def __post_init__(self, options: Mapping[str, Any]): + if isinstance(self.cursor_value, str): + self.cursor_value = InterpolatedString.create(self.cursor_value, options=options) def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Any]: - decoded_response = self._decoder.decode(response) + decoded_response = self.decoder.decode(response) headers = response.headers - if self._stop_condition: - should_stop = self._stop_condition.eval(self._config, response=decoded_response, headers=headers, last_records=last_records) + if self.stop_condition: + should_stop = self.stop_condition.eval(self.config, response=decoded_response, headers=headers, last_records=last_records) if should_stop: return None - token = self._cursor_value.eval(config=self._config, last_records=last_records, response=decoded_response) + token = self.cursor_value.eval(config=self.config, last_records=last_records, response=decoded_response) return token if token else None diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py index ae5cd6cff0140..bfbd92df3e247 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py @@ -2,26 +2,31 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Optional import requests -from airbyte_cdk.sources.declarative.requesters.paginators.pagination_strategy import PaginationStrategy +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import PaginationStrategy +from dataclasses_jsonschema import JsonSchemaMixin -class OffsetIncrement(PaginationStrategy): +@dataclass +class OffsetIncrement(PaginationStrategy, JsonSchemaMixin): """ Pagination strategy that returns the number of records reads so far and returns it as the next page token + + Attributes: + page_size (int): the number of records to request """ - def __init__(self, page_size: int): - """ - :param page_size: the number of records to request - """ + page_size: int + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): self._offset = 0 - self._page_size = page_size def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Any]: - if len(last_records) < self._page_size: + if len(last_records) < self.page_size: return None else: self._offset += len(last_records) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py index e53479444cb58..f39ca388ada11 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py @@ -2,26 +2,31 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, List, Mapping, Optional import requests -from airbyte_cdk.sources.declarative.requesters.paginators.pagination_strategy import PaginationStrategy +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import PaginationStrategy +from dataclasses_jsonschema import JsonSchemaMixin -class PageIncrement(PaginationStrategy): +@dataclass +class PageIncrement(PaginationStrategy, JsonSchemaMixin): """ Pagination strategy that returns the number of pages reads so far and returns it as the next page token + + Attributes: + page_size (int): the number of records to request """ - def __init__(self, page_size: int): - """ - :param page_size: the number of records to request - """ - self._page_size = page_size + page_size: int + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): self._offset = 0 def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Any]: - if len(last_records) < self._page_size: + if len(last_records) < self.page_size: return None else: self._offset += 1 diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py similarity index 80% rename from airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/pagination_strategy.py rename to airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index d839f5c35970f..7174fc16a377b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -3,12 +3,15 @@ # from abc import abstractmethod +from dataclasses import dataclass from typing import Any, List, Mapping, Optional import requests +from dataclasses_jsonschema import JsonSchemaMixin -class PaginationStrategy: +@dataclass +class PaginationStrategy(JsonSchemaMixin): """ Defines how to get the next page token """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py index 91221557cf4af..1ed01f34b87c3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -2,8 +2,11 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from enum import Enum -from typing import Optional +from typing import Any, Mapping, Optional + +from dataclasses_jsonschema import JsonSchemaMixin class RequestOptionType(Enum): @@ -18,34 +21,27 @@ class RequestOptionType(Enum): body_json = "body_json" -class RequestOption: +@dataclass +class RequestOption(JsonSchemaMixin): """ Describes an option to set on a request + + Attributes: + inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter + field_name (Optional[str]): Describes the name of the parameter to inject. None if option_type == path. Required otherwise. """ - def __init__(self, inject_into: RequestOptionType, field_name: Optional[str] = None): - """ - :param inject_into: where to set the value - :param field_name: field name to set. None if option_type == path. Required otherwise. - """ - self._option_type = inject_into - self._field_name = field_name - if self._option_type == RequestOptionType.path: - if self._field_name is not None: - raise ValueError(f"RequestOption with path cannot have a field name. Get {field_name}") - elif self._field_name is None: - raise ValueError(f"RequestOption expected field name for type {self._option_type}") - - @property - def inject_into(self) -> RequestOptionType: - """Describes where in the HTTP request to inject the parameter""" - return self._option_type - - @property - def field_name(self) -> Optional[str]: - """Describes the name of the parameter to inject""" - return self._field_name + inject_into: RequestOptionType + options: InitVar[Mapping[str, Any]] + field_name: Optional[str] = None + + def __post_init__(self, options: Mapping[str, Any]): + if self.inject_into == RequestOptionType.path: + if self.field_name is not None: + raise ValueError(f"RequestOption with path cannot have a field name. Get {self.field_name}") + elif self.field_name is None: + raise ValueError(f"RequestOption expected field name for type {self.inject_into}") def is_path(self) -> bool: """Returns true if the parameter is the path to send the request to""" - return self._option_type == RequestOptionType.path + return self.inject_into == RequestOptionType.path diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py index 1100c1c58cf51..9bb93d757d126 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py @@ -1,3 +1,10 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import ( + InterpolatedRequestOptionsProvider, +) +from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider + +__all__ = ["InterpolatedRequestOptionsProvider", "RequestOptionsProvider"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/interpolated_request_input_provider.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py similarity index 100% rename from airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/interpolated_request_input_provider.py rename to airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py index 793594eb4c119..6348ccc35884b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py @@ -2,52 +2,56 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass, field from typing import Any, Mapping, MutableMapping, Optional, Union -from airbyte_cdk.sources.declarative.requesters.interpolated_request_input_provider import InterpolatedRequestInputProvider +from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_input_provider import InterpolatedRequestInputProvider from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin RequestInput = Union[str, Mapping[str, str]] -class InterpolatedRequestOptionsProvider(RequestOptionsProvider): - """Defines the request options to set on an outgoing HTTP request by evaluating `InterpolatedMapping`s""" +@dataclass +class InterpolatedRequestOptionsProvider(RequestOptionsProvider, JsonSchemaMixin): + """ + Defines the request options to set on an outgoing HTTP request by evaluating `InterpolatedMapping`s - def __init__( - self, - *, - config: Config, - request_parameters: Optional[RequestInput] = None, - request_headers: Optional[RequestInput] = None, - request_body_data: Optional[RequestInput] = None, - request_body_json: Optional[RequestInput] = None, - ): - """ - :param config: The user-provided configuration as specified by the source's spec - :param request_parameters: The request parameters to set on an outgoing HTTP request - :param request_headers: The request headers to set on an outgoing HTTP request - :param request_body_data: The body data to set on an outgoing HTTP request - :param request_body_json: The json content to set on an outgoing HTTP request - """ - if request_parameters is None: - request_parameters = {} - if request_headers is None: - request_headers = {} - if request_body_data is None: - request_body_data = {} - if request_body_json is None: - request_body_json = {} + Attributes: + config (Config): The user-provided configuration as specified by the source's spec + request_parameters (Union[str, Mapping[str, str]]): The request parameters to set on an outgoing HTTP request + request_headers (Union[str, Mapping[str, str]]): The request headers to set on an outgoing HTTP request + request_body_data (Union[str, Mapping[str, str]]): The body data to set on an outgoing HTTP request + request_body_json (Union[str, Mapping[str, str]]): The json content to set on an outgoing HTTP request + """ + + options: InitVar[Mapping[str, Any]] + config: Config = field(default_factory=dict) + request_parameters: Optional[RequestInput] = None + request_headers: Optional[RequestInput] = None + request_body_data: Optional[RequestInput] = None + request_body_json: Optional[RequestInput] = None + + def __post_init__(self, options: Mapping[str, Any]): + if self.request_parameters is None: + self.request_parameters = {} + if self.request_headers is None: + self.request_headers = {} + if self.request_body_data is None: + self.request_body_data = {} + if self.request_body_json is None: + self.request_body_json = {} - if request_body_json and request_body_data: + if self.request_body_json and self.request_body_data: raise ValueError("RequestOptionsProvider should only contain either 'request_body_data' or 'request_body_json' not both") - self._parameter_interpolator = InterpolatedRequestInputProvider(config=config, request_inputs=request_parameters) - self._headers_interpolator = InterpolatedRequestInputProvider(config=config, request_inputs=request_headers) - self._body_data_interpolator = InterpolatedRequestInputProvider(config=config, request_inputs=request_body_data) - self._body_json_interpolator = InterpolatedRequestInputProvider(config=config, request_inputs=request_body_json) + self._parameter_interpolator = InterpolatedRequestInputProvider(config=self.config, request_inputs=self.request_parameters) + self._headers_interpolator = InterpolatedRequestInputProvider(config=self.config, request_inputs=self.request_headers) + self._body_data_interpolator = InterpolatedRequestInputProvider(config=self.config, request_inputs=self.request_body_data) + self._body_json_interpolator = InterpolatedRequestInputProvider(config=self.config, request_inputs=self.request_body_json) - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, @@ -59,7 +63,7 @@ def request_params( return interpolated_value return {} - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, @@ -68,7 +72,7 @@ def request_headers( ) -> Mapping[str, Any]: return self._headers_interpolator.request_inputs(stream_state, stream_slice, next_page_token) - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, @@ -77,7 +81,7 @@ def request_body_data( ) -> Optional[Union[Mapping, str]]: return self._body_data_interpolator.request_inputs(stream_state, stream_slice, next_page_token) - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py index 425107afe29a1..1be5fa690349f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py @@ -3,11 +3,13 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Any, Mapping, MutableMapping, Optional, Union from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState +@dataclass class RequestOptionsProvider(ABC): """ Defines the request options to set on an outgoing HTTP request @@ -20,7 +22,7 @@ class RequestOptionsProvider(ABC): """ @abstractmethod - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, @@ -35,7 +37,7 @@ def request_params( pass @abstractmethod - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, @@ -45,7 +47,7 @@ def request_headers( """Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.""" @abstractmethod - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, @@ -63,7 +65,7 @@ def request_body_data( """ @abstractmethod - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py index 8b7d0e0450438..24c4211df5ed8 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/requester.py @@ -55,7 +55,7 @@ def get_method(self) -> HttpMethod: """ @abstractmethod - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, @@ -81,7 +81,7 @@ def should_retry(self, response: requests.Response) -> ResponseStatus: """ @abstractmethod - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, @@ -93,7 +93,7 @@ def request_headers( """ @abstractmethod - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, @@ -111,7 +111,7 @@ def request_body_data( """ @abstractmethod - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py index 1100c1c58cf51..9c47818b3e725 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py @@ -1,3 +1,8 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever +from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever + +__all__ = ["Retriever", "SimpleRetriever"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py index bda876c52951f..a9ae02806425a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py @@ -3,12 +3,14 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Iterable, List, Optional from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +@dataclass class Retriever(ABC): """ Responsible for fetching a stream's records from an HTTP API source. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index b39a01d14a728..8eda1ec154012 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass, field from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union import requests @@ -17,9 +18,11 @@ from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState from airbyte_cdk.sources.streams.http import HttpStream +from dataclasses_jsonschema import JsonSchemaMixin -class SimpleRetriever(Retriever, HttpStream): +@dataclass +class SimpleRetriever(Retriever, HttpStream, JsonSchemaMixin): """ Retrieves records by synchronously sending requests to fetch records. @@ -30,34 +33,30 @@ class SimpleRetriever(Retriever, HttpStream): This retriever currently inherits from HttpStream to reuse the request submission and pagination machinery. As a result, some of the parameters passed to some methods are unused. The two will be decoupled in a future release. + + Attributes: + stream_name (str): The stream's name + stream_primary_key (Optional[Union[str, List[str], List[List[str]]]]): The stream's primary key + requester (Requester): The HTTP requester + record_selector (HttpSelector): The record selector + paginator (Optional[Paginator]): The paginator + stream_slicer (Optional[StreamSlicer]): The stream slicer + options (Mapping[str, Any]): Additional runtime parameters to be used for string interpolation """ - def __init__( - self, - name, - primary_key, - requester: Requester, - record_selector: HttpSelector, - paginator: Optional[Paginator] = None, - stream_slicer: Optional[StreamSlicer] = SingleSlice(), - **options: Optional[Mapping[str, Any]], - ): - """ - :param name: The stream's name - :param primary_key: The stream's primary key - :param requester: The HTTP requester - :param record_selector: The record selector - :param paginator: The paginator - :param stream_slicer: The stream slicer - :param options: Additional runtime parameters to be used for string interpolation - """ - self._name = name - self._primary_key = primary_key - self._paginator = paginator or NoPagination() - self._requester = requester - self._record_selector = record_selector - super().__init__(self._requester.get_authenticator()) - self._stream_slicer = stream_slicer + requester: Requester + record_selector: HttpSelector + options: InitVar[Mapping[str, Any]] + name: str + _name: str = field(init=False, repr=False) + primary_key: Optional[Union[str, List[str], List[List[str]]]] + _primary_key: str = field(init=False, repr=False) + paginator: Optional[Paginator] = None + stream_slicer: Optional[StreamSlicer] = SingleSlice(options={}) + + def __post_init__(self, options: Mapping[str, Any]): + self.paginator = self.paginator or NoPagination(options=options) + HttpStream.__init__(self, self.requester.get_authenticator()) self._last_response = None self._last_records = None @@ -68,13 +67,18 @@ def name(self) -> str: """ return self._name + @name.setter + def name(self, value: str) -> None: + if not isinstance(value, property): + self._name = value + @property def url_base(self) -> str: - return self._requester.get_url_base() + return self.requester.get_url_base() @property def http_method(self) -> str: - return str(self._requester.get_method().value) + return str(self.requester.get_method().value) @property def raise_on_http_errors(self) -> bool: @@ -91,7 +95,7 @@ def should_retry(self, response: requests.Response) -> bool: Unexpected but transient exceptions (connection timeout, DNS resolution failed, etc..) are retried by default. """ - return self._requester.should_retry(response).action == ResponseAction.RETRY + return self.requester.should_retry(response).action == ResponseAction.RETRY def backoff_time(self, response: requests.Response) -> Optional[float]: """ @@ -103,7 +107,7 @@ def backoff_time(self, response: requests.Response) -> Optional[float]: :return how long to backoff in seconds. The return value may be a floating point number for subsecond precision. Returning None defers backoff to the default backoff behavior (e.g using an exponential algorithm). """ - should_retry = self._requester.should_retry(response) + should_retry = self.requester.should_retry(response) if should_retry.action != ResponseAction.RETRY: raise ValueError(f"backoff_time can only be applied on retriable response action. Got {should_retry.action}") assert should_retry.action == ResponseAction.RETRY @@ -127,11 +131,12 @@ def _get_request_options( :param paginator_method: :return: """ - requester_mapping = requester_method(self.state, stream_slice, next_page_token) + + requester_mapping = requester_method(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) requester_mapping_keys = set(requester_mapping.keys()) - paginator_mapping = paginator_method(self.state, stream_slice, next_page_token) + paginator_mapping = paginator_method(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) paginator_mapping_keys = set(paginator_mapping.keys()) - stream_slicer_mapping = stream_slicer_method(stream_slice) + stream_slicer_mapping = stream_slicer_method(stream_slice=stream_slice) stream_slicer_mapping_keys = set(stream_slicer_mapping.keys()) intersection = ( @@ -153,9 +158,9 @@ def request_headers( return self._get_request_options( stream_slice, next_page_token, - self._requester.request_headers, - self._paginator.request_headers, - self._stream_slicer.request_headers, + self.requester.get_request_headers, + self.paginator.get_request_headers, + self.stream_slicer.get_request_headers, ) def request_params( @@ -172,9 +177,9 @@ def request_params( return self._get_request_options( stream_slice, next_page_token, - self._requester.request_params, - self._paginator.request_params, - self._stream_slicer.request_params, + self.requester.get_request_params, + self.paginator.get_request_params, + self.stream_slicer.get_request_params, ) def request_body_data( @@ -193,11 +198,11 @@ def request_body_data( At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden. """ # Warning: use self.state instead of the stream_state passed as argument! - base_body_data = self._requester.request_body_data( + base_body_data = self.requester.get_request_body_data( stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token ) if isinstance(base_body_data, str): - paginator_body_data = self._paginator.request_body_data() + paginator_body_data = self.paginator.get_request_body_data() if paginator_body_data: raise ValueError( f"Cannot combine requester's body data= {base_body_data} with paginator's body_data: {paginator_body_data}" @@ -207,9 +212,9 @@ def request_body_data( return self._get_request_options( stream_slice, next_page_token, - self._requester.request_body_data, - self._paginator.request_body_data, - self._stream_slicer.request_body_data, + self.requester.get_request_body_data, + self.paginator.get_request_body_data, + self.stream_slicer.get_request_body_data, ) def request_body_json( @@ -227,9 +232,9 @@ def request_body_json( return self._get_request_options( stream_slice, next_page_token, - self._requester.request_body_json, - self._paginator.request_body_json, - self._stream_slicer.request_body_json, + self.requester.get_request_body_json, + self.paginator.get_request_body_json, + self.stream_slicer.get_request_body_json, ) def request_kwargs( @@ -244,7 +249,7 @@ def request_kwargs( this method. Note that these options do not conflict with request-level options such as headers, request params, etc.. """ # Warning: use self.state instead of the stream_state passed as argument! - return self._requester.request_kwargs(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) + return self.requester.request_kwargs(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) def path( self, @@ -262,25 +267,25 @@ def path( :return: """ # Warning: use self.state instead of the stream_state passed as argument! - paginator_path = self._paginator.path() + paginator_path = self.paginator.path() if paginator_path: return paginator_path else: - return self._requester.get_path(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) + return self.requester.get_path(stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token) @property def cache_filename(self) -> str: """ Return the name of cache file """ - return self._requester.cache_filename + return self.requester.cache_filename @property def use_cache(self) -> bool: """ If True, all records will be cached. """ - return self._requester.use_cache + return self.requester.use_cache def parse_response( self, @@ -293,7 +298,7 @@ def parse_response( # if fail -> raise exception # if ignore -> ignore response and return no records # else -> delegate to record selector - response_status = self._requester.should_retry(response) + response_status = self.requester.should_retry(response) if response_status.action == ResponseAction.FAIL: raise ReadException(f"Request {response.request} failed with response {response}") elif response_status.action == ResponseAction.IGNORE: @@ -302,7 +307,7 @@ def parse_response( # Warning: use self.state instead of the stream_state passed as argument! self._last_response = response - records = self._record_selector.select_records( + records = self.record_selector.select_records( response=response, stream_state=self.state, stream_slice=stream_slice, next_page_token=next_page_token ) self._last_records = records @@ -313,6 +318,11 @@ def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: """The stream's primary key""" return self._primary_key + @primary_key.setter + def primary_key(self, value: str) -> None: + if not isinstance(value, property): + self._primary_key = value + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: """ Specifies a pagination strategy. @@ -321,7 +331,7 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, :return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response. """ - return self._paginator.next_page_token(response, self._last_records) + return self.paginator.next_page_token(response, self._last_records) def read_records( self, @@ -334,11 +344,11 @@ def read_records( stream_slice = stream_slice or {} # None-check records_generator = HttpStream.read_records(self, sync_mode, cursor_field, stream_slice, self.state) for r in records_generator: - self._stream_slicer.update_cursor(stream_slice, last_record=r) + self.stream_slicer.update_cursor(stream_slice, last_record=r) yield r else: last_record = self._last_records[-1] if self._last_records else None - self._stream_slicer.update_cursor(stream_slice, last_record=last_record) + self.stream_slicer.update_cursor(stream_slice, last_record=last_record) yield from [] def stream_slices( @@ -353,13 +363,13 @@ def stream_slices( :return: """ # Warning: use self.state instead of the stream_state passed as argument! - return self._stream_slicer.stream_slices(sync_mode, self.state) + return self.stream_slicer.stream_slices(sync_mode, self.state) @property def state(self) -> MutableMapping[str, Any]: - return self._stream_slicer.get_stream_state() + return self.stream_slicer.get_stream_state() @state.setter def state(self, value: StreamState): """State setter, accept state serialized by state getter.""" - self._stream_slicer.update_cursor(value) + self.stream_slicer.update_cursor(value) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/__init__.py index 1100c1c58cf51..cbef6eb1d268b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/__init__.py @@ -1,3 +1,8 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.schema.json_schema import JsonSchema +from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader + +__all__ = ["JsonSchema", "SchemaLoader"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/json_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/json_schema.py index 8e29bd329c716..e3a42dd04f17a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/json_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/json_schema.py @@ -3,25 +3,34 @@ # import json -from typing import Any, Mapping, Optional +from dataclasses import InitVar, dataclass +from typing import Any, Mapping, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader from airbyte_cdk.sources.declarative.types import Config +from dataclasses_jsonschema import JsonSchemaMixin -class JsonSchema(SchemaLoader): - """Loads the schema from a json file""" +@dataclass +class JsonSchema(SchemaLoader, JsonSchemaMixin): + """ + Loads the schema from a json file - def __init__(self, file_path: InterpolatedString, config: Config, **options: Optional[Mapping[str, Any]]): - """ - :param file_path: The path to the json file describing the schema - :param config: The user-provided configuration as specified by the source's spec - :param options: Additional arguments to pass to the string interpolation if needed - """ - self._file_path = file_path - self._config = config - self._options = options + Attributes: + file_path (Union[InterpolatedString, str]): The path to the json file describing the schema + name (str): The stream's name + config (Config): The user-provided configuration as specified by the source's spec + options (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed + """ + + file_path: Union[InterpolatedString, str] + name: str + config: Config + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): + self.file_path = InterpolatedString.create(self.file_path, options=options) def get_json_schema(self) -> Mapping[str, Any]: json_schema_path = self._get_json_filepath() @@ -29,4 +38,4 @@ def get_json_schema(self) -> Mapping[str, Any]: return json.loads(f.read()) def _get_json_filepath(self): - return self._file_path.eval(self._config, **self._options) + return self.file_path.eval(self.config) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py index 57ce7ca8b0b7e..3a0d45316a4e3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/schema_loader.py @@ -3,9 +3,11 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Any, Mapping +@dataclass class SchemaLoader(ABC): """Describes a stream's schema""" diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/__init__.py index 1100c1c58cf51..5fcd546f87bd4 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/__init__.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/__init__.py @@ -1,3 +1,12 @@ # # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # + +from airbyte_cdk.sources.declarative.stream_slicers.cartesian_product_stream_slicer import CartesianProductStreamSlicer +from airbyte_cdk.sources.declarative.stream_slicers.datetime_stream_slicer import DatetimeStreamSlicer +from airbyte_cdk.sources.declarative.stream_slicers.list_stream_slicer import ListStreamSlicer +from airbyte_cdk.sources.declarative.stream_slicers.single_slice import SingleSlice +from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer +from airbyte_cdk.sources.declarative.stream_slicers.substream_slicer import SubstreamSlicer + +__all__ = ["CartesianProductStreamSlicer", "DatetimeStreamSlicer", "ListStreamSlicer", "SingleSlice", "StreamSlicer", "SubstreamSlicer"] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py index 9c52c07abc874..1004b6a7ecee5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/cartesian_product_stream_slicer.py @@ -4,14 +4,17 @@ import itertools from collections import ChainMap +from dataclasses import InitVar, dataclass from typing import Any, Iterable, List, Mapping, Optional from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class CartesianProductStreamSlicer(StreamSlicer): +@dataclass +class CartesianProductStreamSlicer(StreamSlicer, JsonSchemaMixin): """ Stream slicers that iterates over the cartesian product of input stream slicers Given 2 stream slicers with the following slices: @@ -26,57 +29,78 @@ class CartesianProductStreamSlicer(StreamSlicer): {"i": 2, "s": "hello"}, {"i": 2, "s": "world"}, ] + + Attributes: + stream_slicers (List[StreamSlicer]): Underlying stream slicers. The RequestOptions (e.g: Request headers, parameters, etc..) returned by this slicer are the combination of the RequestOptions of its input slicers. If there are conflicts e.g: two slicers define the same header or request param, the conflict is resolved by taking the value from the first slicer, where ordering is determined by the order in which slicers were input to this composite slicer. """ - def __init__(self, stream_slicers: List[StreamSlicer]): - """ - :param stream_slicers: Underlying stream slicers. The RequestOptions (e.g: Request headers, parameters, etc..) returned by this slicer are the combination of the RequestOptions of its input slicers. If there are conflicts e.g: two slicers define the same header or request param, the conflict is resolved by taking the value from the first slicer, where ordering is determined by the order in which slicers were input to this composite slicer. - """ - self._stream_slicers = stream_slicers + stream_slicers: List[StreamSlicer] + options: InitVar[Mapping[str, Any]] def update_cursor(self, stream_slice: Mapping[str, Any], last_record: Optional[Mapping[str, Any]] = None): - for slicer in self._stream_slicers: + for slicer in self.stream_slicers: slicer.update_cursor(stream_slice, last_record) - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: - return dict(ChainMap(*[s.request_params() for s in self._stream_slicers])) + return dict(ChainMap(*[s.get_request_params() for s in self.stream_slicers])) - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: - return dict(ChainMap(*[s.request_headers(stream_state, stream_slice, next_page_token) for s in self._stream_slicers])) + return dict( + ChainMap( + *[ + s.get_request_headers(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + for s in self.stream_slicers + ] + ) + ) - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Mapping[str, Any]: - return dict(ChainMap(*[s.request_body_data(stream_state, stream_slice, next_page_token) for s in self._stream_slicers])) + return dict( + ChainMap( + *[ + s.get_request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + for s in self.stream_slicers + ] + ) + ) - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Optional[Mapping]: - return dict(ChainMap(*[s.request_body_json(stream_state, stream_slice, next_page_token) for s in self._stream_slicers])) + return dict( + ChainMap( + *[ + s.get_request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + for s in self.stream_slicers + ] + ) + ) def get_stream_state(self) -> Mapping[str, Any]: - return dict(ChainMap(*[slicer.get_stream_state() for slicer in self._stream_slicers])) + return dict(ChainMap(*[slicer.get_stream_state() for slicer in self.stream_slicers])) def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> Iterable[Mapping[str, Any]]: - sub_slices = (s.stream_slices(sync_mode, stream_state) for s in self._stream_slicers) + sub_slices = (s.stream_slices(sync_mode, stream_state) for s in self.stream_slicers) return (ChainMap(*a) for a in itertools.product(*sub_slices)) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index 772a8fc51e251..351c4be8b2d4b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -4,6 +4,7 @@ import datetime import re +from dataclasses import InitVar, dataclass, field from typing import Any, Iterable, Mapping, Optional import dateutil @@ -14,9 +15,11 @@ from airbyte_cdk.sources.declarative.requesters.request_option import RequestOption, RequestOptionType from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class DatetimeStreamSlicer(StreamSlicer): +@dataclass +class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): """ Slices the stream over a datetime range. @@ -31,70 +34,60 @@ class DatetimeStreamSlicer(StreamSlicer): - days, d For example, "1d" will produce windows of 1 day, and 2weeks windows of 2 weeks. + + Attributes: + start_datetime (MinMaxDatetime): the datetime that determines the earliest record that should be synced + end_datetime (MinMaxDatetime): the datetime that determines the last record that should be synced + step (str): size of the timewindow + cursor_field (InterpolatedString): record's cursor field + datetime_format (str): format of the datetime + config (Config): connection config + start_time_option (Optional[RequestOption]): request option for start time + end_time_option (Optional[RequestOption]): request option for end time + stream_state_field_start (Optional[str]): stream slice start time field + stream_state_field_end (Optional[str]): stream slice end time field + lookback_window (Optional[InterpolatedString]): how many days before start_datetime to read data for """ + start_datetime: MinMaxDatetime + end_datetime: MinMaxDatetime + step: str + cursor_field: InterpolatedString + datetime_format: str + config: Config + options: InitVar[Mapping[str, Any]] + _cursor: dict = field(repr=False, default=None) # tracks current datetime + _cursor_end: dict = field(repr=False, default=None) # tracks end of current stream slice + start_time_option: Optional[RequestOption] = None + end_time_option: Optional[RequestOption] = None + stream_state_field_start: Optional[str] = None + stream_state_field_end: Optional[str] = None + lookback_window: Optional[InterpolatedString] = None + timedelta_regex = re.compile(r"((?P[\.\d]+?)w)?" r"((?P[\.\d]+?)d)?$") - def __init__( - self, - start_datetime: MinMaxDatetime, - end_datetime: MinMaxDatetime, - step: str, - cursor_field: InterpolatedString, - datetime_format: str, - config: Config, - start_time_option: Optional[RequestOption] = None, - end_time_option: Optional[RequestOption] = None, - stream_state_field_start: Optional[str] = None, - stream_state_field_end: Optional[str] = None, - lookback_window: Optional[InterpolatedString] = None, - **options: Optional[Mapping[str, Any]], - ): - """ - :param start_datetime: - :param end_datetime: - :param step: size of the timewindow - :param cursor_field: record's cursor field - :param datetime_format: format of the datetime - :param config: connection config - :param start_time_option: request option for start time - :param end_time_option: request option for end time - :param stream_state_field_start: stream slice start time field - :param stream_state_field_end: stream slice end time field - :param lookback_window: how many days before start_datetime to read data for - :param options: Additional runtime parameters to be used for string interpolation - """ + def __post_init__(self, options: Mapping[str, Any]): self._timezone = datetime.timezone.utc self._interpolation = JinjaInterpolation() - self._datetime_format = datetime_format - self._start_datetime = start_datetime - self._end_datetime = end_datetime - self._step = self._parse_timedelta(step) - self._config = config - self._cursor_field = InterpolatedString.create(cursor_field, options=options) - self._start_time_option = start_time_option - self._end_time_option = end_time_option - self._stream_slice_field_start = InterpolatedString.create(stream_state_field_start or "start_time", options=options) - self._stream_slice_field_end = InterpolatedString.create(stream_state_field_end or "end_time", options=options) - self._cursor = None # tracks current datetime - self._cursor_end = None # tracks end of current stream slice - self._lookback_window = lookback_window - self._options = options + self._step = self._parse_timedelta(self.step) + self.cursor_field = InterpolatedString.create(self.cursor_field, options=options) + self.stream_slice_field_start = InterpolatedString.create(self.stream_state_field_start or "start_time", options=options) + self.stream_slice_field_end = InterpolatedString.create(self.stream_state_field_end or "end_time", options=options) # If datetime format is not specified then start/end datetime should inherit it from the stream slicer - if not self._start_datetime.datetime_format: - self._start_datetime.datetime_format = self._datetime_format - if not self._end_datetime.datetime_format: - self._end_datetime.datetime_format = self._datetime_format + if not self.start_datetime.datetime_format: + self.start_datetime.datetime_format = self.datetime_format + if not self.end_datetime.datetime_format: + self.end_datetime.datetime_format = self.datetime_format - if self._start_time_option and self._start_time_option.inject_into == RequestOptionType.path: + if self.start_time_option and self.start_time_option.inject_into == RequestOptionType.path: raise ValueError("Start time cannot be passed by path") - if self._end_time_option and self._end_time_option.inject_into == RequestOptionType.path: + if self.end_time_option and self.end_time_option.inject_into == RequestOptionType.path: raise ValueError("End time cannot be passed by path") def get_stream_state(self) -> StreamState: - return {self._cursor_field.eval(self._config): self._cursor} if self._cursor else {} + return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] = None): """ @@ -105,9 +98,9 @@ def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] :param last_record: last record read :return: None """ - stream_slice_value = stream_slice.get(self._cursor_field.eval(self._config)) - stream_slice_value_end = stream_slice.get(self._stream_slice_field_end.eval(self._config)) - last_record_value = last_record.get(self._cursor_field.eval(self._config)) if last_record else None + stream_slice_value = stream_slice.get(self.cursor_field.eval(self.config)) + stream_slice_value_end = stream_slice.get(self.stream_slice_field_end.eval(self.config)) + last_record_value = last_record.get(self.cursor_field.eval(self.config)) if last_record else None cursor = None if stream_slice_value and last_record_value: cursor = max(stream_slice_value, last_record_value) @@ -119,7 +112,7 @@ def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] self._cursor = max(cursor, self._cursor) elif cursor: self._cursor = cursor - if self._stream_slice_field_end: + if self.stream_slice_field_end: self._cursor_end = stream_slice_value_end def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> Iterable[Mapping[str, Any]]: @@ -135,18 +128,18 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> """ stream_state = stream_state or {} kwargs = {"stream_state": stream_state} - end_datetime = min(self._end_datetime.get_datetime(self._config, **kwargs), datetime.datetime.now(tz=datetime.timezone.utc)) - lookback_delta = self._parse_timedelta(self._lookback_window.eval(self._config, **kwargs) if self._lookback_window else "0d") - start_datetime = self._start_datetime.get_datetime(self._config, **kwargs) - lookback_delta + end_datetime = min(self.end_datetime.get_datetime(self.config, **kwargs), datetime.datetime.now(tz=datetime.timezone.utc)) + lookback_delta = self._parse_timedelta(self.lookback_window.eval(self.config, **kwargs) if self.lookback_window else "0d") + start_datetime = self.start_datetime.get_datetime(self.config, **kwargs) - lookback_delta start_datetime = min(start_datetime, end_datetime) - if self._cursor_field.eval(self._config, stream_state=stream_state) in stream_state: - cursor_datetime = self.parse_date(stream_state[self._cursor_field.eval(self._config)]) + if self.cursor_field.eval(self.config, stream_state=stream_state) in stream_state: + cursor_datetime = self.parse_date(stream_state[self.cursor_field.eval(self.config)]) else: cursor_datetime = start_datetime start_datetime = max(cursor_datetime, start_datetime) - state_date = self.parse_date(stream_state.get(self._cursor_field.eval(self._config, stream_state=stream_state))) + state_date = self.parse_date(stream_state.get(self.cursor_field.eval(self.config, stream_state=stream_state))) if state_date: # If the input_state's date is greater than start_datetime, the start of the time window is the state's next day next_date = state_date + datetime.timedelta(days=1) @@ -155,14 +148,14 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> return dates def _format_datetime(self, dt: datetime.datetime): - if self._datetime_format == "timestamp": + if self.datetime_format == "timestamp": return dt.timestamp() else: - return dt.strftime(self._datetime_format) + return dt.strftime(self.datetime_format) def _partition_daterange(self, start, end, step: datetime.timedelta): - start_field = self._stream_slice_field_start.eval(self._config) - end_field = self._stream_slice_field_end.eval(self._config) + start_field = self.stream_slice_field_start.eval(self.config) + end_field = self.stream_slice_field_end.eval(self.config) dates = [] while start <= end: end_date = self._get_date(start + step - datetime.timedelta(days=1), end, min) @@ -206,7 +199,7 @@ def _parse_timedelta(cls, time_str): time_params = {name: float(param) for name, param in parts.groupdict().items() if param} return datetime.timedelta(**time_params) - def request_params( + def get_request_params( self, *, stream_state: Optional[StreamState] = None, @@ -215,7 +208,7 @@ def request_params( ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter, stream_slice) - def request_headers( + def get_request_headers( self, *, stream_state: Optional[StreamState] = None, @@ -224,7 +217,7 @@ def request_headers( ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.header, stream_slice) - def request_body_data( + def get_request_body_data( self, *, stream_state: Optional[StreamState] = None, @@ -233,7 +226,7 @@ def request_body_data( ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_data, stream_slice) - def request_body_json( + def get_request_body_json( self, *, stream_state: Optional[StreamState] = None, @@ -248,10 +241,8 @@ def request_kwargs(self) -> Mapping[str, Any]: def _get_request_options(self, option_type: RequestOptionType, stream_slice: StreamSlice): options = {} - if self._start_time_option and self._start_time_option.inject_into == option_type: - options[self._start_time_option.field_name] = stream_slice.get( - self._stream_slice_field_start.eval(self._config, **self._options) - ) - if self._end_time_option and self._end_time_option.inject_into == option_type: - options[self._end_time_option.field_name] = stream_slice.get(self._stream_slice_field_end.eval(self._config, **self._options)) + if self.start_time_option and self.start_time_option.inject_into == option_type: + options[self.start_time_option.field_name] = stream_slice.get(self.stream_slice_field_start.eval(self.config)) + if self.end_time_option and self.end_time_option.inject_into == option_type: + options[self.end_time_option.field_name] = stream_slice.get(self.stream_slice_field_end.eval(self.config)) return options diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/list_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/list_stream_slicer.py index 2dbea6841de84..ac83d1a967cf9 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/list_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/list_stream_slicer.py @@ -2,6 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, Iterable, List, Mapping, Optional, Union from airbyte_cdk.models import SyncMode @@ -9,51 +10,47 @@ from airbyte_cdk.sources.declarative.requesters.request_option import RequestOption, RequestOptionType from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class ListStreamSlicer(StreamSlicer): +@dataclass +class ListStreamSlicer(StreamSlicer, JsonSchemaMixin): """ Stream slicer that iterates over the values of a list If slice_values is a string, then evaluate it as literal and assert the resulting literal is a list + + Attributes: + slice_values (Union[str, List[str]]): The values to iterate over + cursor_field (Union[InterpolatedString, str]): The name of the cursor field + config (Config): The user-provided configuration as specified by the source's spec + request_option (Optional[RequestOption]): The request option to configure the HTTP request """ - def __init__( - self, - slice_values: Union[str, List[str]], - cursor_field: Union[InterpolatedString, str], - config: Config, - request_option: Optional[RequestOption] = None, - **options: Optional[Mapping[str, Any]], - ): - """ - :param slice_values: The values to iterate over - :param cursor_field: The name of the cursor field - :param config: The user-provided configuration as specified by the source's spec - :param request_option: The request option to configure the HTTP request - :param options: Additional runtime parameters to be used for string interpolation - """ - if isinstance(slice_values, str): - slice_values = InterpolatedString.create(slice_values, options=options).eval(config) - if isinstance(cursor_field, str): - cursor_field = InterpolatedString(cursor_field, options=options) - self._cursor_field = cursor_field - self._slice_values = slice_values - self._config = config - self._cursor = None - self._request_option = request_option + slice_values: Union[str, List[str]] + cursor_field: Union[InterpolatedString, str] + config: Config + options: InitVar[Mapping[str, Any]] + request_option: Optional[RequestOption] = None - if request_option and request_option.inject_into == RequestOptionType.path: + def __post_init__(self, options: Mapping[str, Any]): + if isinstance(self.slice_values, str): + self.slice_values = InterpolatedString.create(self.slice_values, options=options).eval(self.config) + if isinstance(self.cursor_field, str): + self.cursor_field = InterpolatedString(string=self.cursor_field, options=options) + + if self.request_option and self.request_option.inject_into == RequestOptionType.path: raise ValueError("Slice value cannot be injected in the path") + self._cursor = None def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] = None): - slice_value = stream_slice.get(self._cursor_field.eval(self._config)) - if slice_value and slice_value in self._slice_values: + slice_value = stream_slice.get(self.cursor_field.eval(self.config)) + if slice_value and slice_value in self.slice_values: self._cursor = slice_value def get_stream_state(self) -> StreamState: - return {self._cursor_field.eval(self._config): self._cursor} if self._cursor else {} + return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} - def request_params( + def get_request_params( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -61,7 +58,7 @@ def request_params( ) -> Mapping[str, Any]: return self._get_request_option(RequestOptionType.request_parameter) - def request_headers( + def get_request_headers( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -69,7 +66,7 @@ def request_headers( ) -> Mapping[str, Any]: return self._get_request_option(RequestOptionType.header) - def request_body_data( + def get_request_body_data( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -77,7 +74,7 @@ def request_body_data( ) -> Mapping[str, Any]: return self._get_request_option(RequestOptionType.body_data) - def request_body_json( + def get_request_body_json( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -86,10 +83,10 @@ def request_body_json( return self._get_request_option(RequestOptionType.body_json) def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> Iterable[Mapping[str, Any]]: - return [{self._cursor_field.eval(self._config): slice_value} for slice_value in self._slice_values] + return [{self.cursor_field.eval(self.config): slice_value} for slice_value in self.slice_values] def _get_request_option(self, request_option_type: RequestOptionType): - if self._request_option and self._request_option.inject_into == request_option_type: - return {self._request_option.field_name: self._cursor} + if self.request_option and self.request_option.inject_into == request_option_type: + return {self.request_option.field_name: self._cursor} else: return {} diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/single_slice.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/single_slice.py index 161cdae970ffb..532982de9d088 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/single_slice.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/single_slice.py @@ -2,18 +2,20 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from dataclasses import InitVar, dataclass from typing import Any, Iterable, Mapping, Optional from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin -class SingleSlice(StreamSlicer): +@dataclass +class SingleSlice(StreamSlicer, JsonSchemaMixin): """Stream slicer returning only a single stream slice""" - def __init__(self, **options): - pass + options: InitVar[Mapping[str, Any]] def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] = None): pass @@ -21,7 +23,7 @@ def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] def get_stream_state(self) -> StreamState: return {} - def request_params( + def get_request_params( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -29,7 +31,7 @@ def request_params( ) -> Mapping[str, Any]: return {} - def request_headers( + def get_request_headers( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -37,7 +39,7 @@ def request_headers( ) -> Mapping[str, Any]: return {} - def request_body_data( + def get_request_body_data( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -45,7 +47,7 @@ def request_body_data( ) -> Mapping[str, Any]: return {} - def request_body_json( + def get_request_body_json( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py index 025aa2bf1556e..4ff22ce12c611 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py @@ -3,6 +3,7 @@ # from abc import abstractmethod +from dataclasses import dataclass from typing import Iterable, Optional from airbyte_cdk.models import SyncMode @@ -10,6 +11,7 @@ from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState +@dataclass class StreamSlicer(RequestOptionsProvider): """ Slices the stream into a subset of records. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py index b387a70275090..d5b8b306b86dd 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/substream_slicer.py @@ -2,7 +2,7 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from dataclasses import dataclass +from dataclasses import InitVar, dataclass from typing import Any, Iterable, List, Mapping, Optional from airbyte_cdk.models import SyncMode @@ -10,6 +10,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState from airbyte_cdk.sources.streams.core import Stream +from dataclasses_jsonschema import JsonSchemaMixin @dataclass @@ -26,35 +27,38 @@ class ParentStreamConfig: stream: Stream parent_key: str stream_slice_field: str + options: InitVar[Mapping[str, Any]] request_option: Optional[RequestOption] = None -class SubstreamSlicer(StreamSlicer): +@dataclass +class SubstreamSlicer(StreamSlicer, JsonSchemaMixin): """ Stream slicer that iterates over the parent's stream slices and records and emits slices by interpolating the slice_definition mapping Will populate the state with `parent_stream_slice` and `parent_record` so they can be accessed by other components + + Attributes: + parent_stream_configs (List[ParentStreamConfig]): parent streams to iterate over and their config """ - def __init__(self, parent_streams_configs: List[ParentStreamConfig], **options: Optional[Mapping[str, Any]]): - """ - :param parent_streams_configs: parent streams to iterate over and their config - :param options: Additional runtime parameters to be used for string interpolation - """ - if not parent_streams_configs: + parent_stream_configs: List[ParentStreamConfig] + options: InitVar[Mapping[str, Any]] + + def __post_init__(self, options: Mapping[str, Any]): + if not self.parent_stream_configs: raise ValueError("SubstreamSlicer needs at least 1 parent stream") - self._parent_stream_configs = parent_streams_configs self._cursor = None self._options = options def update_cursor(self, stream_slice: StreamSlice, last_record: Optional[Record] = None): cursor = {} - for parent_stream_config in self._parent_stream_configs: + for parent_stream_config in self.parent_stream_configs: slice_value = stream_slice.get(parent_stream_config.stream_slice_field) if slice_value: cursor.update({parent_stream_config.stream_slice_field: slice_value}) self._cursor = cursor - def request_params( + def get_request_params( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -62,7 +66,7 @@ def request_params( ) -> Mapping[str, Any]: return self._get_request_option(RequestOptionType.request_parameter) - def request_headers( + def get_request_headers( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -70,7 +74,7 @@ def request_headers( ) -> Mapping[str, Any]: return self._get_request_option(RequestOptionType.header) - def request_body_data( + def get_request_body_data( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -78,7 +82,7 @@ def request_body_data( ) -> Mapping[str, Any]: return self._get_request_option(RequestOptionType.body_data) - def request_body_json( + def get_request_body_json( self, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, @@ -88,7 +92,7 @@ def request_body_json( def _get_request_option(self, option_type: RequestOptionType): params = {} - for parent_config in self._parent_stream_configs: + for parent_config in self.parent_stream_configs: if parent_config.request_option and parent_config.request_option.inject_into == option_type: key = parent_config.stream_slice_field value = self._cursor.get(key) @@ -114,10 +118,10 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: StreamState) -> Itera - parent_record: mapping representing the parent record - parent_stream_name: string representing the parent stream name """ - if not self._parent_stream_configs: + if not self.parent_stream_configs: yield from [] else: - for parent_stream_config in self._parent_stream_configs: + for parent_stream_config in self.parent_stream_configs: parent_stream = parent_stream_config.stream parent_field = parent_stream_config.parent_key stream_state_field = parent_stream_config.stream_slice_field diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py index d28e0941fc263..51ed5468acbd3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py @@ -2,13 +2,14 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from dataclasses import dataclass +from dataclasses import InitVar, dataclass, field from typing import Any, List, Mapping, Optional, Union import dpath.util from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.declarative.types import Config, FieldPointer, Record, StreamSlice, StreamState +from dataclasses_jsonschema import JsonSchemaMixin @dataclass(frozen=True) @@ -17,6 +18,7 @@ class AddedFieldDefinition: path: FieldPointer value: Union[InterpolatedString, str] + options: InitVar[Mapping[str, Any]] @dataclass(frozen=True) @@ -25,9 +27,11 @@ class ParsedAddFieldDefinition: path: FieldPointer value: InterpolatedString + options: InitVar[Mapping[str, Any]] -class AddFields(RecordTransformation): +@dataclass +class AddFields(RecordTransformation, JsonSchemaMixin): """ Transformation which adds field to an output record. The path of the added field can be nested. Adding nested fields will create all necessary parent objects (like mkdir -p). Adding fields to an array will extend the array to that index (filling intermediate @@ -73,25 +77,31 @@ class AddFields(RecordTransformation): # by supplying any valid Jinja template directive or expression https://jinja.palletsprojects.com/en/3.1.x/templates/# - path: ["two_times_two"] value: {{ 2 * 2 }} + + Attributes: + fields (List[AddedFieldDefinition]): A list of transformations (path and corresponding value) that will be added to the record """ - def __init__(self, fields: List[AddedFieldDefinition], **options: Optional[Mapping[str, Any]]): - """ - :param fields: Fields to add - :param options: Additional runtime parameters to be used for string interpolation - """ - self._fields: List[ParsedAddFieldDefinition] = [] - for field in fields: - if len(field.path) < 1: - raise f"Expected a non-zero-length path for the AddFields transformation {field}" - - if not isinstance(field.value, InterpolatedString): - if not isinstance(field.value, str): - raise f"Expected a string value for the AddFields transformation: {field}" + fields: List[AddedFieldDefinition] + options: InitVar[Mapping[str, Any]] + _parsed_fields: List[ParsedAddFieldDefinition] = field(init=False, repr=False, default_factory=list) + + def __post_init__(self, options: Mapping[str, Any]): + for add_field in self.fields: + if len(add_field.path) < 1: + raise f"Expected a non-zero-length path for the AddFields transformation {add_field}" + + if not isinstance(add_field.value, InterpolatedString): + if not isinstance(add_field.value, str): + raise f"Expected a string value for the AddFields transformation: {add_field}" else: - self._fields.append(ParsedAddFieldDefinition(field.path, InterpolatedString.create(field.value, options=options))) + self._parsed_fields.append( + ParsedAddFieldDefinition( + add_field.path, InterpolatedString.create(add_field.value, options=options), options=options + ) + ) else: - self._fields.append(ParsedAddFieldDefinition(field.path, field.value)) + self._parsed_fields.append(ParsedAddFieldDefinition(add_field.path, add_field.value, options={})) def transform( self, @@ -101,9 +111,9 @@ def transform( stream_slice: Optional[StreamSlice] = None, ) -> Record: kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice} - for field in self._fields: - value = field.value.eval(config, **kwargs) - dpath.util.new(record, field.path, value) + for parsed_field in self._parsed_fields: + value = parsed_field.value.eval(config, **kwargs) + dpath.util.new(record, parsed_field.path, value) return record diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py index 792c39e95fc1f..7c568a45941ed 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py @@ -2,15 +2,18 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # -from typing import List +from dataclasses import InitVar, dataclass +from typing import Any, List, Mapping import dpath.exceptions import dpath.util from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.declarative.types import FieldPointer, Record +from dataclasses_jsonschema import JsonSchemaMixin -class RemoveFields(RecordTransformation): +@dataclass +class RemoveFields(RecordTransformation, JsonSchemaMixin): """ A transformation which removes fields from a record. The fields removed are designated using FieldPointers. During transformation, if a field or any of its parents does not exist in the record, no error is thrown. @@ -31,20 +34,20 @@ class RemoveFields(RecordTransformation): - ["path", "to", "field1"] - ["path2"] ``` + + Attributes: + field_pointers (List[FieldPointer]): pointers to the fields that should be removed """ - def __init__(self, field_pointers: List[FieldPointer]): - """ - :param field_pointers: pointers to the fields that should be removed - """ - self._field_pointers = field_pointers + field_pointers: List[FieldPointer] + options: InitVar[Mapping[str, Any]] def transform(self, record: Record, **kwargs) -> Record: """ :param record: The record to be transformed :return: the input record with the requested fields removed """ - for pointer in self._field_pointers: + for pointer in self.field_pointers: # the dpath library by default doesn't delete fields from arrays try: dpath.util.delete(record, pointer) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py index f7c0d8c9ce6d1..1b2c429687d0a 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py @@ -3,11 +3,13 @@ # from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Optional from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +@dataclass class RecordTransformation(ABC): """ Implementations of this class define transformations that can be applied to records of a stream. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py index cbad72a4f3a8f..2a8bd72833715 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py @@ -32,15 +32,15 @@ def get_access_token(self) -> str: t0 = pendulum.now() token, expires_in = self.refresh_access_token() self.access_token = token - self.token_expiry_date = t0.add(seconds=expires_in) + self.set_token_expiry_date(t0.add(seconds=expires_in)) return self.access_token def token_has_expired(self) -> bool: """Returns True if the token is expired""" - return pendulum.now() > self.token_expiry_date + return pendulum.now() > self.get_token_expiry_date() - def get_refresh_request_body(self) -> Mapping[str, Any]: + def build_refresh_request_body(self) -> Mapping[str, Any]: """ Returns the request body to set on the refresh request @@ -48,16 +48,16 @@ def get_refresh_request_body(self) -> Mapping[str, Any]: """ payload: MutableMapping[str, Any] = { "grant_type": "refresh_token", - "client_id": self.client_id, - "client_secret": self.client_secret, - "refresh_token": self.refresh_token, + "client_id": self.get_client_id(), + "client_secret": self.get_client_secret(), + "refresh_token": self.get_refresh_token(), } - if self.scopes: - payload["scopes"] = self.scopes + if self.get_scopes: + payload["scopes"] = self.get_scopes() - if self.refresh_request_body: - for key, val in self.refresh_request_body.items(): + if self.get_refresh_request_body(): + for key, val in self.get_refresh_request_body().items(): # We defer to existing oauth constructs over custom configured fields if key not in payload: payload[key] = val @@ -71,61 +71,51 @@ def refresh_access_token(self) -> Tuple[str, int]: :return: a tuple of (access_token, token_lifespan_in_seconds) """ try: - response = requests.request(method="POST", url=self.token_refresh_endpoint, data=self.get_refresh_request_body()) + response = requests.request(method="POST", url=self.get_token_refresh_endpoint(), data=self.build_refresh_request_body()) response.raise_for_status() response_json = response.json() - return response_json[self.access_token_name], response_json[self.expires_in_name] + return response_json[self.get_access_token_name()], response_json[self.get_expires_in_name()] except Exception as e: raise Exception(f"Error while refreshing access token: {e}") from e - @property @abstractmethod - def token_refresh_endpoint(self) -> str: + def get_token_refresh_endpoint(self) -> str: """Returns the endpoint to refresh the access token""" - @property @abstractmethod - def client_id(self) -> str: + def get_client_id(self) -> str: """The client id to authenticate""" - @property @abstractmethod - def client_secret(self) -> str: + def get_client_secret(self) -> str: """The client secret to authenticate""" - @property @abstractmethod - def refresh_token(self) -> str: + def get_refresh_token(self) -> str: """The token used to refresh the access token when it expires""" - @property @abstractmethod - def scopes(self) -> List[str]: + def get_scopes(self) -> List[str]: """List of requested scopes""" - @property @abstractmethod - def token_expiry_date(self) -> pendulum.datetime: + def get_token_expiry_date(self) -> pendulum.datetime: """Expiration date of the access token""" - @token_expiry_date.setter @abstractmethod - def token_expiry_date(self, value: pendulum.datetime): + def set_token_expiry_date(self, value: pendulum.datetime): """Setter for access token expiration date""" - @property @abstractmethod - def access_token_name(self) -> str: + def get_access_token_name(self) -> str: """Field to extract access token from in the response""" - @property @abstractmethod - def expires_in_name(self): - """Setter for field to extract access token expiration date from in the response""" + def get_expires_in_name(self) -> str: + """Returns the expires_in field name""" - @property @abstractmethod - def refresh_request_body(self) -> Mapping[str, Any]: + def get_refresh_request_body(self) -> Mapping[str, Any]: """Returns the request body to set on the refresh request""" @property diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index ec37f436b6e75..d479652f78b8f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -26,89 +26,47 @@ def __init__( expires_in_name: str = "expires_in", refresh_request_body: Mapping[str, Any] = None, ): - self.token_refresh_endpoint = token_refresh_endpoint - self.client_secret = client_secret - self.client_id = client_id - self.refresh_token = refresh_token - self.scopes = scopes - self.access_token_name = access_token_name - self.expires_in_name = expires_in_name - self.refresh_request_body = refresh_request_body - - self.token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) - self.access_token = None - - @property - def token_refresh_endpoint(self) -> str: + self._token_refresh_endpoint = token_refresh_endpoint + self._client_secret = client_secret + self._client_id = client_id + self._refresh_token = refresh_token + self._scopes = scopes + self._access_token_name = access_token_name + self._expires_in_name = expires_in_name + self._refresh_request_body = refresh_request_body + + self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) + self._access_token = None + + def get_token_refresh_endpoint(self) -> str: return self._token_refresh_endpoint - @token_refresh_endpoint.setter - def token_refresh_endpoint(self, value: str): - self._token_refresh_endpoint = value - - @property - def client_id(self) -> str: + def get_client_id(self) -> str: return self._client_id - @client_id.setter - def client_id(self, value: str): - self._client_id = value - - @property - def client_secret(self) -> str: + def get_client_secret(self) -> str: return self._client_secret - @client_secret.setter - def client_secret(self, value: str): - self._client_secret = value - - @property - def refresh_token(self) -> str: + def get_refresh_token(self) -> str: return self._refresh_token - @refresh_token.setter - def refresh_token(self, value: str): - self._refresh_token = value - - @property - def access_token_name(self) -> str: + def get_access_token_name(self) -> str: return self._access_token_name - @access_token_name.setter - def access_token_name(self, value: str): - self._access_token_name = value - - @property - def scopes(self) -> [str]: + def get_scopes(self) -> [str]: return self._scopes - @scopes.setter - def scopes(self, value: [str]): - self._scopes = value - - @property - def token_expiry_date(self) -> pendulum.DateTime: - return self._token_expiry_date - - @token_expiry_date.setter - def token_expiry_date(self, value: pendulum.DateTime): - self._token_expiry_date = value - - @property - def expires_in_name(self) -> str: + def get_expires_in_name(self) -> str: return self._expires_in_name - @expires_in_name.setter - def expires_in_name(self, value): - self._expires_in_name = value - - @property - def refresh_request_body(self) -> Mapping[str, Any]: + def get_refresh_request_body(self) -> Mapping[str, Any]: return self._refresh_request_body - @refresh_request_body.setter - def refresh_request_body(self, value: Mapping[str, Any]): - self._refresh_request_body = value + def get_token_expiry_date(self) -> pendulum.DateTime: + return self._token_expiry_date + + def set_token_expiry_date(self, value: pendulum.DateTime): + self._token_expiry_date = value @property def access_token(self) -> str: diff --git a/airbyte-cdk/python/reference_docs/_source/conf.py b/airbyte-cdk/python/reference_docs/_source/conf.py index ff5dcf2caec57..5ce9636934f6c 100644 --- a/airbyte-cdk/python/reference_docs/_source/conf.py +++ b/airbyte-cdk/python/reference_docs/_source/conf.py @@ -32,7 +32,10 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc"] # API docs +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", # Support for NumPy and Google style docstrings +] # API docs source_suffix = {".rst": "restructuredtext", ".md": "markdown"} diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index d314389c93d56..22643e5fced3d 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -44,6 +44,7 @@ packages=find_packages(exclude=("unit_tests",)), install_requires=[ "backoff", + "dataclasses-jsonschema~=2.15.1", "dpath~=2.0.1", "jsonschema~=3.2.0", "jsonref~=0.2", diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py index 2d0c1d265fa8c..12cb353de5c04 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_oauth.py @@ -17,11 +17,11 @@ "refresh_endpoint": "refresh_end", "client_id": "some_client_id", "client_secret": "some_client_secret", - "refresh_token": "some_refresh_token", "token_expiry_date": pendulum.now().subtract(days=2).to_rfc3339_string(), "custom_field": "in_outbound_request", "another_field": "exists_in_body", } +options = {"refresh_token": "some_refresh_token"} class TestOauth2Authenticator: @@ -38,7 +38,7 @@ def test_refresh_request_body(self): token_refresh_endpoint="{{ config['refresh_endpoint'] }}", client_id="{{ config['client_id'] }}", client_secret="{{ config['client_secret'] }}", - refresh_token="{{ config['refresh_token'] }}", + refresh_token="{{ options['refresh_token'] }}", config=config, scopes=["scope1", "scope2"], token_expiry_date="{{ config['token_expiry_date'] }}", @@ -47,8 +47,9 @@ def test_refresh_request_body(self): "another_field": "{{ config['another_field'] }}", "scopes": ["no_override"], }, + options=options, ) - body = oauth.get_refresh_request_body() + body = oauth.build_refresh_request_body() expected = { "grant_type": "refresh_token", "client_id": "some_client_id", @@ -74,6 +75,7 @@ def test_refresh_access_token(self, mocker): "another_field": "{{ config['another_field'] }}", "scopes": ["no_override"], }, + options={}, ) resp.status_code = 200 @@ -81,6 +83,9 @@ def test_refresh_access_token(self, mocker): mocker.patch.object(requests, "request", side_effect=mock_request, autospec=True) token = oauth.refresh_access_token() + schem = DeclarativeOauth2Authenticator.json_schema() + print(schem) + assert ("access_token", 1000) == token diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py index b46f9eac643e0..29613a73fbe5f 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/auth/test_token_auth.py @@ -13,6 +13,7 @@ resp = Response() config = {"username": "user", "password": "password", "header": "header"} +options = {"username": "user", "password": "password", "header": "header"} @pytest.mark.parametrize( @@ -20,13 +21,14 @@ [ ("test_static_token", "test-token", "Bearer test-token"), ("test_token_from_config", "{{ config.username }}", "Bearer user"), + ("test_token_from_options", "{{ options.username }}", "Bearer user"), ], ) def test_bearer_token_authenticator(test_name, token, expected_header_value): """ Should match passed in token, no matter how many times token is retrieved. """ - token_auth = BearerAuthenticator(token, config) + token_auth = BearerAuthenticator(token, config, options=options) header1 = token_auth.get_auth_header() header2 = token_auth.get_auth_header() @@ -44,13 +46,14 @@ def test_bearer_token_authenticator(test_name, token, expected_header_value): [ ("test_static_creds", "user", "password", "Basic dXNlcjpwYXNzd29yZA=="), ("test_creds_from_config", "{{ config.username }}", "{{ config.password }}", "Basic dXNlcjpwYXNzd29yZA=="), + ("test_creds_from_options", "{{ options.username }}", "{{ options.password }}", "Basic dXNlcjpwYXNzd29yZA=="), ], ) def test_basic_authenticator(test_name, username, password, expected_header_value): """ Should match passed in token, no matter how many times token is retrieved. """ - token_auth = BasicHttpAuthenticator(username=username, password=password, config=config) + token_auth = BasicHttpAuthenticator(username=username, password=password, config=config, options=options) header1 = token_auth.get_auth_header() header2 = token_auth.get_auth_header() @@ -68,13 +71,14 @@ def test_basic_authenticator(test_name, username, password, expected_header_valu [ ("test_static_token", "Authorization", "test-token", "Authorization", "test-token"), ("test_token_from_config", "{{ config.header }}", "{{ config.username }}", "header", "user"), + ("test_token_from_options", "{{ options.header }}", "{{ options.username }}", "header", "user"), ], ) def test_api_key_authenticator(test_name, header, token, expected_header, expected_header_value): """ Should match passed in token, no matter how many times token is retrieved. """ - token_auth = ApiKeyAuthenticator(header, token, config) + token_auth = ApiKeyAuthenticator(header=header, api_token=token, config=config, options=options) header1 = token_auth.get_auth_header() header2 = token_auth.get_auth_header() diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/checks/test_check_stream.py b/airbyte-cdk/python/unit_tests/sources/declarative/checks/test_check_stream.py index 98aa2f1bdc135..827b99ab64842 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/checks/test_check_stream.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/checks/test_check_stream.py @@ -30,7 +30,7 @@ def test_check_stream(test_name, record, streams_to_check, expectation): source = MagicMock() source.streams.return_value = [stream] - check_stream = CheckStream(streams_to_check) + check_stream = CheckStream(streams_to_check, options={}) if expectation: actual = check_stream.check_connection(source, logger, config) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_min_max_datetime.py b/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_min_max_datetime.py index 56f0a69c3598d..f67032c02d580 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_min_max_datetime.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/datetime/test_min_max_datetime.py @@ -30,13 +30,17 @@ "{{ stream_state['newer'] }}", middle_date, ), + ("test_min_newer_time_from_options", "{{ config['older'] }}", "{{ options['newer'] }}", "", new_date), + ("test_max_newer_time_from_options", "{{ stream_state['newer'] }}", "", "{{ options['older'] }}", old_date), ], ) def test_min_max_datetime(test_name, date, min_date, max_date, expected_date): + print(MinMaxDatetime.json_schema()) config = {"older": old_date, "middle": middle_date} stream_state = {"newer": new_date} + options = {"newer": new_date, "older": old_date} - min_max_date = MinMaxDatetime(datetime=date, min_datetime=min_date, max_datetime=max_date) + min_max_date = MinMaxDatetime(datetime=date, min_datetime=min_date, max_datetime=max_date, options=options) actual_date = min_max_date.get_datetime(config, **{"stream_state": stream_state}) assert actual_date == datetime.datetime.strptime(expected_date, date_format) @@ -51,6 +55,7 @@ def test_custom_datetime_format(): datetime_format="%Y-%m-%dT%H:%M:%S", min_datetime="{{ config['older'] }}", max_datetime="{{ stream_state['newer'] }}", + options={}, ) actual_date = min_max_date.get_datetime(config, **{"stream_state": stream_state}) @@ -66,7 +71,26 @@ def test_format_is_a_number(): datetime_format="%Y%m%d", min_datetime="{{ config['older'] }}", max_datetime="{{ stream_state['newer'] }}", + options={}, ) actual_date = min_max_date.get_datetime(config, **{"stream_state": stream_state}) assert actual_date == datetime.datetime.strptime("20220101", "%Y%m%d").replace(tzinfo=datetime.timezone.utc) + + +def test_set_datetime_format(): + min_max_date = MinMaxDatetime(datetime="{{ config['middle'] }}", min_datetime="{{ config['older'] }}", options={}) + + # Retrieve datetime using the default datetime formatting + default_fmt_config = {"older": "2021-01-01T20:12:19.597854Z", "middle": "2022-01-01T20:12:19.597854Z"} + actual_date = min_max_date.get_datetime(default_fmt_config) + + assert actual_date == datetime.datetime.strptime("2022-01-01T20:12:19.597854Z", "%Y-%m-%dT%H:%M:%S.%f%z") + + # Set a different datetime format and attempt to retrieve datetime using an updated format + min_max_date.datetime_format = "%Y-%m-%dT%H:%M:%S" + + custom_fmt_config = {"older": "2021-01-01T20:12:19", "middle": "2022-01-01T20:12:19"} + actual_date = min_max_date.get_datetime(custom_fmt_config) + + assert actual_date == datetime.datetime.strptime("2022-01-01T20:12:19", "%Y-%m-%dT%H:%M:%S").replace(tzinfo=datetime.timezone.utc) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_jello.py b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_jello.py index 6812e55be11ec..b9a1ec25322d8 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_jello.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_jello.py @@ -10,8 +10,9 @@ from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor config = {"field": "record_array"} +options = {"options_field": "record_array"} -decoder = JsonDecoder() +decoder = JsonDecoder(options={}) @pytest.mark.parametrize( @@ -19,6 +20,7 @@ [ ("test_extract_from_array", "_.data", {"data": [{"id": 1}, {"id": 2}]}, [{"id": 1}, {"id": 2}]), ("test_field_in_config", "_.{{ config['field'] }}", {"record_array": [{"id": 1}, {"id": 2}]}, [{"id": 1}, {"id": 2}]), + ("test_field_in_options", "_.{{ options['options_field'] }}", {"record_array": [{"id": 1}, {"id": 2}]}, [{"id": 1}, {"id": 2}]), ("test_default", "_{{kwargs['field']}}", [{"id": 1}, {"id": 2}], [{"id": 1}, {"id": 2}]), ( "test_remove_fields_from_records", @@ -38,7 +40,7 @@ ], ) def test(test_name, transform, body, expected_records): - extractor = JelloExtractor(transform, config, decoder) + extractor = JelloExtractor(transform=transform, config=config, decoder=decoder, options=options) response = create_response(body) actual_records = extractor.extract_records(response) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_filter.py b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_filter.py index 2b180ee5d935f..e58db11ada566 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_filter.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_filter.py @@ -33,14 +33,21 @@ [{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], [], ), + ( + "test_using_options_filter", + "{{ record['created_at'] > options['created_at'] }}", + [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], + [{"id": 3, "created_at": "06-08-21"}], + ), ], ) def test_record_filter(test_name, filter_template, records, expected_records): config = {"response_override": "stop_if_you_see_me"} + options = {"created_at": "06-07-21"} stream_state = {"created_at": "06-06-21"} stream_slice = {"last_seen": "06-10-21"} next_page_token = {"last_seen_id": 14} - record_filter = RecordFilter(config=config, condition=filter_template) + record_filter = RecordFilter(config=config, condition=filter_template, options=options) actual_records = record_filter.filter_records( records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_selector.py b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_selector.py index 0367d7d34a18d..ed7aa35e6245e 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_selector.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_record_selector.py @@ -29,22 +29,30 @@ {"data": [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}]}, [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}], ), + ( + "test_with_extractor_and_filter_with_options", + "_.{{ options['options_field'] }}", + "{{ record['created_at'] > options['created_at'] }}", + {"data": [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}]}, + [{"id": 3, "created_at": "06-08-21"}], + ), ], ) def test_record_filter(test_name, transform_template, filter_template, body, expected_records): config = {"response_override": "stop_if_you_see_me"} + options = {"options_field": "data", "created_at": "06-07-21"} stream_state = {"created_at": "06-06-21"} stream_slice = {"last_seen": "06-10-21"} next_page_token = {"last_seen_id": 14} response = create_response(body) - decoder = JsonDecoder() - extractor = JelloExtractor(transform=transform_template, decoder=decoder, config=config) + decoder = JsonDecoder(options={}) + extractor = JelloExtractor(transform=transform_template, decoder=decoder, config=config, options=options) if filter_template is None: record_filter = None else: - record_filter = RecordFilter(config=config, condition=filter_template) - record_selector = RecordSelector(extractor=extractor, record_filter=record_filter) + record_filter = RecordFilter(config=config, condition=filter_template, options=options) + record_selector = RecordSelector(extractor=extractor, record_filter=record_filter, options=options) actual_records = record_selector.select_records( response=response, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py index adc832a7bdd57..244d041846cbd 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_boolean.py @@ -19,7 +19,6 @@ @pytest.mark.parametrize( "test_name, template, expected_result", [ - ("test_static_condition", True, True), ("test_interpolated_true_value", "{{ config['parent']['key_with_true'] }}", True), ("test_interpolated_true_comparison", "{{ config['string_key'] == \"compare_me\" }}", True), ("test_interpolated_false_condition", "{{ config['string_key'] == \"witness_me\" }}", False), @@ -34,8 +33,9 @@ ("test_True", "{{ True }}", True), ("test_value_in_array", "{{ 1 in config['non_empty_array'] }}", True), ("test_value_not_in_array", "{{ 2 in config['non_empty_array'] }}", False), + ("test_interpolation_using_options", "{{ options['from_options'] == \"come_find_me\" }}", True), ], ) def test_interpolated_boolean(test_name, template, expected_result): - interpolated_bool = InterpolatedBoolean(template) + interpolated_bool = InterpolatedBoolean(condition=template, options={"from_options": "come_find_me"}) assert interpolated_bool.eval(config) == expected_result diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py index 9413c79caaf86..8491cc6b9086b 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_mapping.py @@ -28,7 +28,7 @@ def test(test_name, key, expected_value): } config = {"c": "VALUE_FROM_CONFIG"} kwargs = {"a": "VALUE_FROM_KWARGS"} - mapping = InterpolatedMapping(d, options={"b": "VALUE_FROM_OPTIONS", "k": "key"}) + mapping = InterpolatedMapping(mapping=d, options={"b": "VALUE_FROM_OPTIONS", "k": "key"}) interpolated = mapping.eval(config, **{"kwargs": kwargs}) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_string.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_string.py index b66d13ccc965a..089174c82f52c 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_string.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_interpolated_string.py @@ -17,6 +17,7 @@ ("test_eval_from_options", "{{ options['hello'] }}", "world"), ("test_eval_from_config", "{{ config['field'] }}", "value"), ("test_eval_from_kwargs", "{{ kwargs['c'] }}", "airbyte"), + ("test_eval_from_kwargs", "{{ kwargs['c'] }}", "airbyte"), ], ) def test_interpolated_string(test_name, input_string, expected_value): diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/iterators/test_only_once.py b/airbyte-cdk/python/unit_tests/sources/declarative/iterators/test_only_once.py index df58c545e56ab..d51ca23b04e37 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/iterators/test_only_once.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/iterators/test_only_once.py @@ -7,7 +7,7 @@ def test(): - iterator = SingleSlice() + iterator = SingleSlice(options={}) stream_slices = iterator.stream_slices(SyncMode.incremental, None) assert stream_slices == [dict()] diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py index 71518b09980d4..d60a862770afb 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/backoff_strategies/test_exponential_backoff.py @@ -22,3 +22,10 @@ def test_exponential_backoff(test_name, attempt_count, expected_backoff_time): backoff_strategy = ExponentialBackoffStrategy(factor=5) backoff = backoff_strategy.backoff(response_mock, attempt_count) assert backoff == expected_backoff_time + + +def test_exponential_backoff_default(): + response_mock = MagicMock() + backoff_strategy = ExponentialBackoffStrategy() + backoff = backoff_strategy.backoff(response_mock, 3) + assert backoff == 40 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py index 74fa5a30dc020..27b47f97368cc 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_composite_error_handler.py @@ -96,7 +96,7 @@ def test_composite_error_handler(test_name, first_handler_behavior, second_handl second_error_handler.should_retry.return_value = second_handler_behavior second_error_handler.should_retry.return_value = second_handler_behavior retriers = [first_error_handler, second_error_handler] - retrier = CompositeErrorHandler(retriers) + retrier = CompositeErrorHandler(error_handlers=retriers, options={}) response_mock = MagicMock() response_mock.ok = first_handler_behavior == response_status.SUCCESS or second_handler_behavior == response_status.SUCCESS assert retrier.should_retry(response_mock) == expected_behavior @@ -104,7 +104,7 @@ def test_composite_error_handler(test_name, first_handler_behavior, second_handl def test_composite_error_handler_no_handlers(): try: - CompositeErrorHandler([]) + CompositeErrorHandler(error_handlers=[], options={}) assert False except ValueError: pass diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py index b7167f7dfcd81..091fc0293bf0b 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/error_handlers/test_default_error_handler.py @@ -59,7 +59,7 @@ "test_403_ignore_error_message", HTTPStatus.FORBIDDEN, None, - HttpResponseFilter(action=ResponseAction.IGNORE, error_message_contain="found"), + HttpResponseFilter(action=ResponseAction.IGNORE, error_message_contains="found", options={}), {}, response_status.IGNORE, None, @@ -68,7 +68,7 @@ "test_403_dont_ignore_error_message", HTTPStatus.FORBIDDEN, None, - HttpResponseFilter(action=ResponseAction.IGNORE, error_message_contain="not_found"), + HttpResponseFilter(action=ResponseAction.IGNORE, error_message_contains="not_found", options={}), {}, response_status.FAIL, None, @@ -78,7 +78,7 @@ "test_ignore_403", HTTPStatus.FORBIDDEN, None, - HttpResponseFilter(action=ResponseAction.IGNORE, http_codes={HTTPStatus.FORBIDDEN}), + HttpResponseFilter(action=ResponseAction.IGNORE, http_codes={HTTPStatus.FORBIDDEN}, options={}), {}, response_status.IGNORE, None, @@ -86,7 +86,7 @@ ( "test_403_with_predicate", HTTPStatus.FORBIDDEN, - HttpResponseFilter(action=ResponseAction.RETRY, predicate="{{ 'code' in response }}"), + HttpResponseFilter(action=ResponseAction.RETRY, predicate="{{ 'code' in response }}", options={}), None, {}, ResponseStatus.retry(10), @@ -95,7 +95,7 @@ ( "test_403_with_predicate", HTTPStatus.FORBIDDEN, - HttpResponseFilter(action=ResponseAction.RETRY, predicate="{{ 'some_absent_field' in response }}"), + HttpResponseFilter(action=ResponseAction.RETRY, predicate="{{ 'some_absent_field' in response }}", options={}), None, {}, response_status.FAIL, @@ -104,7 +104,7 @@ ( "test_200_fail_with_predicate", HTTPStatus.OK, - HttpResponseFilter(action=ResponseAction.FAIL, error_message_contain="found"), + HttpResponseFilter(action=ResponseAction.FAIL, error_message_contains="found", options={}), None, {}, response_status.FAIL, @@ -113,7 +113,7 @@ ( "test_retry_403", HTTPStatus.FORBIDDEN, - HttpResponseFilter(action=ResponseAction.RETRY, http_codes={HTTPStatus.FORBIDDEN}), + HttpResponseFilter(action=ResponseAction.RETRY, http_codes={HTTPStatus.FORBIDDEN}, options={}), None, {}, ResponseStatus.retry(10), @@ -127,7 +127,7 @@ def test_default_error_handler( response_mock = create_response(http_code, headers=response_headers, json_body={"code": "1000", "error": "found"}) response_mock.ok = http_code < 400 response_filters = [f for f in [retry_response_filter, ignore_response_filter] if f] - error_handler = DefaultErrorHandler(response_filters=response_filters, backoff_strategies=backoff_strategy) + error_handler = DefaultErrorHandler(response_filters=response_filters, backoff_strategies=backoff_strategy, options={}) actual_should_retry = error_handler.should_retry(response_mock) assert actual_should_retry == should_retry if should_retry.action == ResponseAction.RETRY: @@ -137,7 +137,7 @@ def test_default_error_handler( def test_default_error_handler_attempt_count_increases(): status_code = 500 response_mock = create_response(status_code) - error_handler = DefaultErrorHandler() + error_handler = DefaultErrorHandler(options={}) actual_should_retry = error_handler.should_retry(response_mock) assert actual_should_retry == ResponseStatus.retry(10) assert actual_should_retry.retry_in == 10 diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py index 73e1f0f5c36ce..0299bd5873414 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_cursor_pagination_strategy.py @@ -18,15 +18,19 @@ ("test_token_from_config", "{{ config.config_key }}", None, "config_value"), ("test_token_from_last_record", "{{ last_records[-1].id }}", None, 1), ("test_token_from_response", "{{ response._metadata.content }}", None, "content_value"), + ("test_token_from_options", "{{ options.key }}", None, "value"), ("test_token_not_found", "{{ response.invalid_key }}", None, None), - ("test_static_token_with_stop_condition_false", "token", InterpolatedBoolean("{{False}}"), "token"), - ("test_static_token_with_stop_condition_true", "token", InterpolatedBoolean("{{True}}"), None), + ("test_static_token_with_stop_condition_false", "token", InterpolatedBoolean(condition="{{False}}", options={}), "token"), + ("test_static_token_with_stop_condition_true", "token", InterpolatedBoolean(condition="{{True}}", options={}), None), ], ) def test_cursor_pagination_strategy(test_name, template_string, stop_condition, expected_token): - decoder = JsonDecoder() + decoder = JsonDecoder(options={}) config = {"config_key": "config_value"} - strategy = CursorPaginationStrategy(template_string, config, stop_condition, decoder) + options = {"key": "value"} + strategy = CursorPaginationStrategy( + cursor_value=template_string, config=config, stop_condition=stop_condition, decoder=decoder, options=options + ) response = requests.Response() response.headers = {"has_more": True} diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_limit_paginator.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_limit_paginator.py index 585da5eccd94a..cbdae4e48531a 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_limit_paginator.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_limit_paginator.py @@ -17,7 +17,7 @@ [ ( "test_limit_paginator_path", - RequestOption(inject_into=RequestOptionType.path), + RequestOption(inject_into=RequestOptionType.path, options={}), None, "/next_url", {"limit": 2}, @@ -29,7 +29,7 @@ ), ( "test_limit_paginator_request_param", - RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from"), + RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", options={}), None, None, {"limit": 2, "from": "https://airbyte.io/next_url"}, @@ -41,8 +41,8 @@ ), ( "test_limit_paginator_no_token", - RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from"), - InterpolatedBoolean("{{True}}"), + RequestOption(inject_into=RequestOptionType.request_parameter, field_name="from", options={}), + InterpolatedBoolean(condition="{{True}}", options={}), None, {"limit": 2}, {}, @@ -53,7 +53,7 @@ ), ( "test_limit_paginator_cursor_header", - RequestOption(inject_into=RequestOptionType.header, field_name="from"), + RequestOption(inject_into=RequestOptionType.header, field_name="from", options={}), None, None, {"limit": 2}, @@ -65,7 +65,7 @@ ), ( "test_limit_paginator_cursor_body_data", - RequestOption(inject_into=RequestOptionType.body_data, field_name="from"), + RequestOption(inject_into=RequestOptionType.body_data, field_name="from", options={}), None, None, {"limit": 2}, @@ -77,7 +77,7 @@ ), ( "test_limit_paginator_cursor_body_json", - RequestOption(inject_into=RequestOptionType.body_json, field_name="from"), + RequestOption(inject_into=RequestOptionType.body_json, field_name="from", options={}), None, None, {"limit": 2}, @@ -101,12 +101,23 @@ def test_limit_paginator( last_records, expected_next_page_token, ): - limit_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="limit") + limit_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="limit", options={}) cursor_value = "{{ response.next }}" url_base = "https://airbyte.io" config = {} - strategy = CursorPaginationStrategy(cursor_value, stop_condition=stop_condition, decoder=JsonDecoder(), config=config) - paginator = LimitPaginator(2, limit_request_option, page_token_request_option, strategy, config, url_base) + options = {} + strategy = CursorPaginationStrategy( + cursor_value=cursor_value, stop_condition=stop_condition, decoder=JsonDecoder(options={}), config=config, options=options + ) + paginator = LimitPaginator( + page_size=2, + limit_option=limit_request_option, + page_token_option=page_token_request_option, + pagination_strategy=strategy, + config=config, + url_base=url_base, + options={}, + ) response = requests.Response() response.headers = {"A_HEADER": "HEADER_VALUE"} @@ -115,10 +126,10 @@ def test_limit_paginator( actual_next_page_token = paginator.next_page_token(response, last_records) actual_next_path = paginator.path() - actual_request_params = paginator.request_params() - actual_headers = paginator.request_headers() - actual_body_data = paginator.request_body_data() - actual_body_json = paginator.request_body_json() + actual_request_params = paginator.get_request_params() + actual_headers = paginator.get_request_headers() + actual_body_data = paginator.get_request_body_data() + actual_body_json = paginator.get_request_body_json() assert actual_next_page_token == expected_next_page_token assert actual_next_path == expected_updated_path assert actual_request_params == expected_request_params @@ -128,14 +139,23 @@ def test_limit_paginator( def test_limit_cannot_be_set_in_path(): - limit_request_option = RequestOption(inject_into=RequestOptionType.path) - page_token_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="offset") + limit_request_option = RequestOption(inject_into=RequestOptionType.path, options={}) + page_token_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="offset", options={}) cursor_value = "{{ response.next }}" url_base = "https://airbyte.io" config = {} - strategy = CursorPaginationStrategy(cursor_value, config) + options = {} + strategy = CursorPaginationStrategy(cursor_value=cursor_value, config=config, options=options) try: - LimitPaginator(2, limit_request_option, page_token_request_option, strategy, config, url_base) + LimitPaginator( + page_size=2, + limit_option=limit_request_option, + page_token_option=page_token_request_option, + pagination_strategy=strategy, + config=config, + url_base=url_base, + options={}, + ) assert False except ValueError: pass diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py index b9fcd7af21fc3..637bebb8f910e 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_no_paginator.py @@ -7,6 +7,6 @@ def test(): - paginator = NoPagination() + paginator = NoPagination(options={}) next_page_token = paginator.next_page_token(requests.Response(), []) assert next_page_token == {} diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py index 866ae756427e0..7376ef155b43a 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py @@ -17,7 +17,7 @@ ], ) def test_offset_increment_paginator_strategy(test_name, page_size, expected_next_page_token, expected_offset): - paginator_strategy = OffsetIncrement(page_size) + paginator_strategy = OffsetIncrement(page_size, options={}) assert paginator_strategy._offset == 0 response = requests.Response() diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py index 7d50dfb105aaa..fa3808a916b07 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py @@ -17,7 +17,7 @@ ], ) def test_page_increment_paginator_strategy(test_name, page_size, expected_next_page_token, expected_offset): - paginator_strategy = PageIncrement(page_size) + paginator_strategy = PageIncrement(page_size, options={}) assert paginator_strategy._offset == 0 response = requests.Response() diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 0ccedc6b4d14c..c54be6223be82 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -23,11 +23,11 @@ ) def test_request_option(test_name, option_type, field_name, should_raise): try: - request_option = RequestOption(inject_into=option_type, field_name=field_name) + request_option = RequestOption(inject_into=option_type, field_name=field_name, options={}) if should_raise: assert False - assert request_option._field_name == field_name - assert request_option._option_type == option_type + assert request_option.field_name == field_name + assert request_option.inject_into == option_type except ValueError: if not should_raise: assert False diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py index 65f458fecaf33..457ddc9a22d8b 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py @@ -30,9 +30,9 @@ ], ) def test_interpolated_request_params(test_name, input_request_params, expected_request_params): - provider = InterpolatedRequestOptionsProvider(config=config, request_parameters=input_request_params) + provider = InterpolatedRequestOptionsProvider(config=config, request_parameters=input_request_params, options={}) - actual_request_params = provider.request_params(stream_state=state, stream_slice=stream_slice, next_page_token=next_page_token) + actual_request_params = provider.get_request_params(stream_state=state, stream_slice=stream_slice, next_page_token=next_page_token) assert actual_request_params == expected_request_params @@ -54,9 +54,9 @@ def test_interpolated_request_params(test_name, input_request_params, expected_r ], ) def test_interpolated_request_json(test_name, input_request_json, expected_request_json): - provider = InterpolatedRequestOptionsProvider(config=config, request_body_json=input_request_json) + provider = InterpolatedRequestOptionsProvider(config=config, request_body_json=input_request_json, options={}) - actual_request_json = provider.request_body_json(stream_state=state, stream_slice=stream_slice, next_page_token=next_page_token) + actual_request_json = provider.get_request_body_json(stream_state=state, stream_slice=stream_slice, next_page_token=next_page_token) assert actual_request_json == expected_request_json @@ -72,9 +72,9 @@ def test_interpolated_request_json(test_name, input_request_json, expected_reque ], ) def test_interpolated_request_data(test_name, input_request_data, expected_request_data): - provider = InterpolatedRequestOptionsProvider(config=config, request_body_data=input_request_data) + provider = InterpolatedRequestOptionsProvider(config=config, request_body_data=input_request_data, options={}) - actual_request_data = provider.request_body_data(stream_state=state, stream_slice=stream_slice, next_page_token=next_page_token) + actual_request_data = provider.get_request_body_data(stream_state=state, stream_slice=stream_slice, next_page_token=next_page_token) assert actual_request_data == expected_request_data @@ -83,4 +83,4 @@ def test_error_on_create_for_both_request_json_and_data(): request_json = {"body_key": "{{ stream_slice['start_date'] }}"} request_data = "interpolate_me=5&invalid={{ config['option'] }}" with pytest.raises(ValueError): - InterpolatedRequestOptionsProvider(config=config, request_body_json=request_json, request_body_data=request_data) + InterpolatedRequestOptionsProvider(config=config, request_body_json=request_json, request_body_data=request_data, options={}) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py index 0b5aabda8ca86..0a6c6b3d72c1b 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -16,13 +16,13 @@ def test_http_requester(): request_params = {"param": "value"} request_body_data = "body_key_1=value_1&body_key_2=value2" request_body_json = {"body_field": "body_value"} - request_options_provider.request_params.return_value = request_params - request_options_provider.request_body_data.return_value = request_body_data - request_options_provider.request_body_json.return_value = request_body_json + request_options_provider.get_request_params.return_value = request_params + request_options_provider.get_request_body_data.return_value = request_body_data + request_options_provider.get_request_body_json.return_value = request_body_json request_headers_provider = MagicMock() request_headers = {"header": "value"} - request_headers_provider.request_headers.return_value = request_headers + request_headers_provider.get_request_headers.return_value = request_headers authenticator = MagicMock() @@ -48,14 +48,15 @@ def test_http_requester(): authenticator=authenticator, error_handler=error_handler, config=config, + options={}, ) assert requester.get_url_base() == "https://airbyte.io" assert requester.get_path(stream_state={}, stream_slice=stream_slice, next_page_token={}) == "v1/1234" assert requester.get_authenticator() == authenticator assert requester.get_method() == HttpMethod.GET - assert requester.request_params(stream_state={}, stream_slice=None, next_page_token=None) == request_params - assert requester.request_body_data(stream_state={}, stream_slice=None, next_page_token=None) == request_body_data - assert requester.request_body_json(stream_state={}, stream_slice=None, next_page_token=None) == request_body_json + assert requester.get_request_params(stream_state={}, stream_slice=None, next_page_token=None) == request_params + assert requester.get_request_body_data(stream_state={}, stream_slice=None, next_page_token=None) == request_body_data + assert requester.get_request_body_json(stream_state={}, stream_slice=None, next_page_token=None) == request_body_json assert requester.should_retry(requests.Response()) == should_retry assert {} == requester.request_kwargs(stream_state={}, stream_slice=None, next_page_token=None) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py index 3362485465460..74ee47267c35e 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_interpolated_request_input_provider.py @@ -4,7 +4,7 @@ import pytest as pytest from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping -from airbyte_cdk.sources.declarative.requesters.interpolated_request_input_provider import InterpolatedRequestInputProvider +from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_input_provider import InterpolatedRequestInputProvider @pytest.mark.parametrize( diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index aa3b1ee215e6f..ad1ce696acb5a 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -14,6 +14,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever +from airbyte_cdk.sources.streams.http.auth import NoAuth primary_key = "pk" records = [{"id": 1}, {"id": 2}] @@ -23,7 +24,7 @@ def test_simple_retriever_full(): requester = MagicMock() request_params = {"param": "value"} - requester.request_params.return_value = request_params + requester.get_request_params.return_value = request_params paginator = MagicMock() next_page_token = {"cursor": "cursor_value"} @@ -42,6 +43,7 @@ def test_simple_retriever_full(): underlying_state = {"date": "2021-01-01"} iterator.get_stream_state.return_value = underlying_state + requester.get_authenticator.return_value = NoAuth url_base = "https://airbyte.io" requester.get_url_base.return_value = url_base path = "/v1" @@ -52,9 +54,9 @@ def test_simple_retriever_full(): should_retry = ResponseStatus.retry(backoff_time) requester.should_retry.return_value = should_retry request_body_data = {"body": "data"} - requester.request_body_data.return_value = request_body_data + requester.get_request_body_data.return_value = request_body_data request_body_json = {"body": "json"} - requester.request_body_json.return_value = request_body_json + requester.get_request_body_json.return_value = request_body_json request_kwargs = {"kwarg": "value"} requester.request_kwargs.return_value = request_kwargs cache_filename = "cache" @@ -63,12 +65,13 @@ def test_simple_retriever_full(): requester.use_cache = use_cache retriever = SimpleRetriever( - "stream_name", - primary_key, + name="stream_name", + primary_key=primary_key, requester=requester, paginator=paginator, record_selector=record_selector, stream_slicer=iterator, + options={}, ) assert retriever.primary_key == primary_key @@ -106,7 +109,7 @@ def test_simple_retriever_full(): ) def test_should_retry(test_name, requester_response, expected_should_retry, expected_backoff_time): requester = MagicMock() - retriever = SimpleRetriever("stream_name", primary_key, requester=requester, record_selector=MagicMock()) + retriever = SimpleRetriever(name="stream_name", primary_key=primary_key, requester=requester, record_selector=MagicMock(), options={}) requester.should_retry.return_value = requester_response assert retriever.should_retry(requests.Response()) == expected_should_retry if requester_response.action == ResponseAction.RETRY: @@ -125,7 +128,9 @@ def test_parse_response(test_name, status_code, response_status, len_expected_re requester = MagicMock() record_selector = MagicMock() record_selector.select_records.return_value = [{"id": 100}] - retriever = SimpleRetriever("stream_name", primary_key, requester=requester, record_selector=record_selector) + retriever = SimpleRetriever( + name="stream_name", primary_key=primary_key, requester=requester, record_selector=record_selector, options={} + ) response = requests.Response() response.status_code = status_code requester.should_retry.return_value = response_status @@ -154,7 +159,9 @@ def test_backoff_time(test_name, response_action, retry_in, expected_backoff_tim record_selector = MagicMock() record_selector.select_records.return_value = [{"id": 100}] response = requests.Response() - retriever = SimpleRetriever("stream_name", primary_key, requester=requester, record_selector=record_selector) + retriever = SimpleRetriever( + name="stream_name", primary_key=primary_key, requester=requester, record_selector=record_selector, options={} + ) if expected_backoff_time: requester.should_retry.return_value = ResponseStatus(response_action, retry_in) actual_backoff_time = retriever.backoff_time(response) @@ -180,27 +187,33 @@ def test_backoff_time(test_name, response_action, retry_in, expected_backoff_tim ) def test_get_request_options_from_pagination(test_name, paginator_mapping, stream_slicer_mapping, expected_mapping): paginator = MagicMock() - paginator.request_headers.return_value = paginator_mapping - paginator.request_params.return_value = paginator_mapping - paginator.request_body_data.return_value = paginator_mapping - paginator.request_body_json.return_value = paginator_mapping + paginator.get_request_headers.return_value = paginator_mapping + paginator.get_request_params.return_value = paginator_mapping + paginator.get_request_body_data.return_value = paginator_mapping + paginator.get_request_body_json.return_value = paginator_mapping stream_slicer = MagicMock() - stream_slicer.request_headers.return_value = stream_slicer_mapping - stream_slicer.request_params.return_value = stream_slicer_mapping - stream_slicer.request_body_data.return_value = stream_slicer_mapping - stream_slicer.request_body_json.return_value = stream_slicer_mapping + stream_slicer.get_request_headers.return_value = stream_slicer_mapping + stream_slicer.get_request_params.return_value = stream_slicer_mapping + stream_slicer.get_request_body_data.return_value = stream_slicer_mapping + stream_slicer.get_request_body_json.return_value = stream_slicer_mapping base_mapping = {"key": "value"} requester = MagicMock() - requester.request_headers.return_value = base_mapping - requester.request_params.return_value = base_mapping - requester.request_body_data.return_value = base_mapping - requester.request_body_json.return_value = base_mapping + requester.get_request_headers.return_value = base_mapping + requester.get_request_params.return_value = base_mapping + requester.get_request_body_data.return_value = base_mapping + requester.get_request_body_json.return_value = base_mapping record_selector = MagicMock() retriever = SimpleRetriever( - "stream_name", primary_key, requester=requester, record_selector=record_selector, paginator=paginator, stream_slicer=stream_slicer + name="stream_name", + primary_key=primary_key, + requester=requester, + record_selector=record_selector, + paginator=paginator, + stream_slicer=stream_slicer, + options={}, ) request_option_type_to_method = { @@ -234,13 +247,20 @@ def test_get_request_options_from_pagination(test_name, paginator_mapping, strea ) def test_request_body_data(test_name, requester_body_data, paginator_body_data, expected_body_data): paginator = MagicMock() - paginator.request_body_data.return_value = paginator_body_data + paginator.get_request_body_data.return_value = paginator_body_data requester = MagicMock() - requester.request_body_data.return_value = requester_body_data + requester.get_request_body_data.return_value = requester_body_data record_selector = MagicMock() - retriever = SimpleRetriever("stream_name", primary_key, requester=requester, record_selector=record_selector, paginator=paginator) + retriever = SimpleRetriever( + name="stream_name", + primary_key=primary_key, + requester=requester, + record_selector=record_selector, + paginator=paginator, + options={}, + ) if expected_body_data: actual_body_data = retriever.request_body_data(None, None, None) @@ -268,7 +288,14 @@ def test_path(test_name, requester_path, paginator_path, expected_path): requester.get_path.return_value = requester_path record_selector = MagicMock() - retriever = SimpleRetriever("stream_name", primary_key, requester=requester, record_selector=record_selector, paginator=paginator) + retriever = SimpleRetriever( + name="stream_name", + primary_key=primary_key, + requester=requester, + record_selector=record_selector, + paginator=paginator, + options={}, + ) actual_path = retriever.path(stream_state=None, stream_slice=None, next_page_token=None) assert expected_path == actual_path diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py index 28563fbdaa9d4..3ed21485c3c0a 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py @@ -17,14 +17,14 @@ [ ( "test_single_stream_slicer", - [ListStreamSlicer(["customer", "store", "subscription"], "owner_resource", None)], + [ListStreamSlicer(slice_values=["customer", "store", "subscription"], cursor_field="owner_resource", config={}, options={})], [{"owner_resource": "customer"}, {"owner_resource": "store"}, {"owner_resource": "subscription"}], ), ( "test_two_stream_slicers", [ - ListStreamSlicer(["customer", "store", "subscription"], "owner_resource", None), - ListStreamSlicer(["A", "B"], "letter", None), + ListStreamSlicer(slice_values=["customer", "store", "subscription"], cursor_field="owner_resource", config={}, options={}), + ListStreamSlicer(slice_values=["A", "B"], cursor_field="letter", config={}, options={}), ], [ {"owner_resource": "customer", "letter": "A"}, @@ -38,14 +38,15 @@ ( "test_list_and_datetime", [ - ListStreamSlicer(["customer", "store", "subscription"], "owner_resource", None), + ListStreamSlicer(slice_values=["customer", "store", "subscription"], cursor_field="owner_resource", config={}, options={}), DatetimeStreamSlicer( - MinMaxDatetime(datetime="2021-01-01", datetime_format="%Y-%m-%d"), - MinMaxDatetime(datetime="2021-01-03", datetime_format="%Y-%m-%d"), - "1d", - InterpolatedString.create("", options={}), - "%Y-%m-%d", - None, + start_datetime=MinMaxDatetime(datetime="2021-01-01", datetime_format="%Y-%m-%d", options={}), + end_datetime=MinMaxDatetime(datetime="2021-01-03", datetime_format="%Y-%m-%d", options={}), + step="1d", + cursor_field=InterpolatedString.create("", options={}), + datetime_format="%Y-%m-%d", + config={}, + options={}, ), ], [ @@ -63,7 +64,7 @@ ], ) def test_substream_slicer(test_name, stream_slicers, expected_slices): - slicer = CartesianProductStreamSlicer(stream_slicers) + slicer = CartesianProductStreamSlicer(stream_slicers=stream_slicers, options={}) slices = [s for s in slicer.stream_slices(SyncMode.incremental, stream_state=None)] assert slices == expected_slices @@ -82,17 +83,18 @@ def test_substream_slicer(test_name, stream_slicers, expected_slices): ) def test_update_cursor(test_name, stream_slice, expected_state): stream_slicers = [ - ListStreamSlicer(["customer", "store", "subscription"], "owner_resource", None), + ListStreamSlicer(slice_values=["customer", "store", "subscription"], cursor_field="owner_resource", config={}, options={}), DatetimeStreamSlicer( - MinMaxDatetime(datetime="2021-01-01", datetime_format="%Y-%m-%d"), - MinMaxDatetime(datetime="2021-01-03", datetime_format="%Y-%m-%d"), - "1d", - InterpolatedString("date"), - "%Y-%m-%d", - None, + start_datetime=MinMaxDatetime(datetime="2021-01-01", datetime_format="%Y-%m-%d", options={}), + end_datetime=MinMaxDatetime(datetime="2021-01-03", datetime_format="%Y-%m-%d", options={}), + step="1d", + cursor_field=InterpolatedString(string="date", options={}), + datetime_format="%Y-%m-%d", + config={}, + options={}, ), ] - slicer = CartesianProductStreamSlicer(stream_slicers) + slicer = CartesianProductStreamSlicer(stream_slicers=stream_slicers, options={}) slicer.update_cursor(stream_slice, None) updated_state = slicer.get_stream_state() assert expected_state == updated_state @@ -103,8 +105,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): [ ( "test_param_header", - RequestOption(RequestOptionType.request_parameter, "owner"), - RequestOption(RequestOptionType.header, "repo"), + RequestOption(inject_into=RequestOptionType.request_parameter, options={}, field_name="owner"), + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="repo"), {"owner": "customer"}, {"repo": "airbyte"}, {}, @@ -112,8 +114,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ), ( "test_header_header", - RequestOption(RequestOptionType.header, "owner"), - RequestOption(RequestOptionType.header, "repo"), + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="owner"), + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="repo"), {}, {"owner": "customer", "repo": "airbyte"}, {}, @@ -121,8 +123,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ), ( "test_body_data", - RequestOption(RequestOptionType.body_data, "owner"), - RequestOption(RequestOptionType.body_data, "repo"), + RequestOption(inject_into=RequestOptionType.body_data, options={}, field_name="owner"), + RequestOption(inject_into=RequestOptionType.body_data, options={}, field_name="repo"), {}, {}, {}, @@ -130,8 +132,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ), ( "test_body_json", - RequestOption(RequestOptionType.body_json, "owner"), - RequestOption(RequestOptionType.body_json, "repo"), + RequestOption(inject_into=RequestOptionType.body_json, options={}, field_name="owner"), + RequestOption(inject_into=RequestOptionType.body_json, options={}, field_name="repo"), {}, {}, {"owner": "customer", "repo": "airbyte"}, @@ -149,14 +151,27 @@ def test_request_option( expected_body_data, ): slicer = CartesianProductStreamSlicer( - [ - ListStreamSlicer(["customer", "store", "subscription"], "owner_resource", None, stream_1_request_option), - ListStreamSlicer(["airbyte", "airbyte-cloud"], "repository", None, stream_2_request_option), - ] + stream_slicers=[ + ListStreamSlicer( + slice_values=["customer", "store", "subscription"], + cursor_field="owner_resource", + config={}, + request_option=stream_1_request_option, + options={}, + ), + ListStreamSlicer( + slice_values=["airbyte", "airbyte-cloud"], + cursor_field="repository", + config={}, + request_option=stream_2_request_option, + options={}, + ), + ], + options={}, ) slicer.update_cursor({"owner_resource": "customer", "repository": "airbyte"}, None) - assert expected_req_params == slicer.request_params() - assert expected_headers == slicer.request_headers() - assert expected_body_json == slicer.request_body_json() - assert expected_body_data == slicer.request_body_data() + assert expected_req_params == slicer.get_request_params() + assert expected_headers == slicer.get_request_headers() + assert expected_body_json == slicer.get_request_body_json() + assert expected_body_data == slicer.get_request_body_data() diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py index a8c8cd2874621..df1aa811ce11f 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py @@ -16,9 +16,7 @@ FAKE_NOW = datetime.datetime(2022, 1, 1, tzinfo=datetime.timezone.utc) config = {"start_date": "2021-01-01T00:00:00.000000+0000", "start_date_ymd": "2021-01-01"} -end_date_now = InterpolatedString( - "{{ today_utc() }}", -) +end_date_now = InterpolatedString(string="{{ today_utc() }}", options={}) cursor_field = "created" timezone = datetime.timezone.utc @@ -36,8 +34,8 @@ def mock_datetime_now(monkeypatch): ( "test_1_day", None, - MinMaxDatetime("{{ config['start_date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "1d", cursor_field, None, @@ -58,8 +56,8 @@ def mock_datetime_now(monkeypatch): ( "test_2_day", None, - MinMaxDatetime("{{ config['start_date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "2d", cursor_field, None, @@ -75,8 +73,8 @@ def mock_datetime_now(monkeypatch): ( "test_from_stream_state", {"date": "2021-01-05T00:00:00.000000+0000"}, - MinMaxDatetime("{{ stream_state['date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="{{ stream_state['date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "1d", cursor_field, None, @@ -93,8 +91,8 @@ def mock_datetime_now(monkeypatch): ( "test_12_day", None, - MinMaxDatetime("{{ config['start_date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "12d", cursor_field, None, @@ -106,8 +104,8 @@ def mock_datetime_now(monkeypatch): ( "test_end_time_greater_than_now", None, - MinMaxDatetime("2021-12-28T00:00:00.000000+0000"), - MinMaxDatetime(f"{(FAKE_NOW + datetime.timedelta(days=1)).strftime(datetime_format)}"), + MinMaxDatetime(datetime="2021-12-28T00:00:00.000000+0000", options={}), + MinMaxDatetime(datetime=f"{(FAKE_NOW + datetime.timedelta(days=1)).strftime(datetime_format)}", options={}), "1d", cursor_field, None, @@ -123,8 +121,8 @@ def mock_datetime_now(monkeypatch): ( "test_start_date_greater_than_end_time", None, - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), - MinMaxDatetime("2021-01-05T00:00:00.000000+0000"), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), + MinMaxDatetime(datetime="2021-01-05T00:00:00.000000+0000", options={}), "1d", cursor_field, None, @@ -136,10 +134,10 @@ def mock_datetime_now(monkeypatch): ( "test_cursor_date_greater_than_start_date", {"date": "2021-01-05T00:00:00.000000+0000"}, - MinMaxDatetime("{{ stream_state['date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="{{ stream_state['date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "1d", - InterpolatedString("{{ stream_state['date'] }}"), + InterpolatedString(string="{{ stream_state['date'] }}", options={}), None, datetime_format, [ @@ -154,8 +152,8 @@ def mock_datetime_now(monkeypatch): ( "test_cursor_date_greater_than_start_date_multiday_step", {cursor_field: "2021-01-05T00:00:00.000000+0000"}, - MinMaxDatetime("2021-01-03T00:00:00.000000+0000"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="2021-01-03T00:00:00.000000+0000", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "2d", cursor_field, None, @@ -169,10 +167,10 @@ def mock_datetime_now(monkeypatch): ( "test_start_date_less_than_min_date", {"date": "2021-01-05T00:00:00.000000+0000"}, - MinMaxDatetime("{{ config['start_date'] }}", min_datetime="{{ stream_state['date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", min_datetime="{{ stream_state['date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "1d", - InterpolatedString("{{ stream_state['date'] }}"), + InterpolatedString(string="{{ stream_state['date'] }}", options={}), None, datetime_format, [ @@ -187,8 +185,8 @@ def mock_datetime_now(monkeypatch): ( "test_end_date_greater_than_max_date", {"date": "2021-01-05T00:00:00.000000+0000"}, - MinMaxDatetime("{{ config['start_date'] }}"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000", max_datetime="{{ stream_state['date'] }}"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", max_datetime="{{ stream_state['date'] }}", options={}), "1d", cursor_field, None, @@ -204,8 +202,8 @@ def mock_datetime_now(monkeypatch): ( "test_start_end_min_max_inherits_datetime_format_from_stream_slicer", {"date": "2021-01-05"}, - MinMaxDatetime("{{ config['start_date_ymd'] }}"), - MinMaxDatetime("2021-01-10", max_datetime="{{ stream_state['date'] }}"), + MinMaxDatetime(datetime="{{ config['start_date_ymd'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10", max_datetime="{{ stream_state['date'] }}", options={}), "1d", cursor_field, None, @@ -221,8 +219,8 @@ def mock_datetime_now(monkeypatch): ( "test_with_lookback_window_from_start_date", {"date": "2021-01-05"}, - MinMaxDatetime("{{ config['start_date'] }}"), - MinMaxDatetime("2021-01-10", max_datetime="{{ stream_state['date'] }}", datetime_format="%Y-%m-%d"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10", max_datetime="{{ stream_state['date'] }}", datetime_format="%Y-%m-%d", options={}), "1d", cursor_field, "3d", @@ -241,8 +239,8 @@ def mock_datetime_now(monkeypatch): ( "test_with_lookback_window_defaults_to_0d", {"date": "2021-01-05"}, - MinMaxDatetime("{{ config['start_date'] }}"), - MinMaxDatetime("2021-01-10", max_datetime="{{ stream_state['date'] }}", datetime_format="%Y-%m-%d"), + MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), + MinMaxDatetime(datetime="2021-01-10", max_datetime="{{ stream_state['date'] }}", datetime_format="%Y-%m-%d", options={}), "1d", cursor_field, "{{ config['does_not_exist'] }}", @@ -258,8 +256,8 @@ def mock_datetime_now(monkeypatch): ( "test_start_is_after_stream_state", {cursor_field: "2021-01-05T00:00:00.000000+0000"}, - MinMaxDatetime("2021-01-01T00:00:00.000000+0000"), - MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), + MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), "1d", cursor_field, None, @@ -277,7 +275,7 @@ def mock_datetime_now(monkeypatch): def test_stream_slices( mock_datetime_now, test_name, stream_state, start, end, step, cursor_field, lookback_window, datetime_format, expected_slices ): - lookback_window = InterpolatedString(lookback_window) if lookback_window else None + lookback_window = InterpolatedString(string=lookback_window, options={}) if lookback_window else None slicer = DatetimeStreamSlicer( start_datetime=start, end_datetime=end, @@ -286,6 +284,7 @@ def test_stream_slices( datetime_format=datetime_format, lookback_window=lookback_window, config=config, + options={}, ) stream_slices = slicer.stream_slices(SyncMode.incremental, stream_state) @@ -335,13 +334,14 @@ def test_stream_slices( ) def test_update_cursor(test_name, previous_cursor, stream_slice, last_record, expected_state): slicer = DatetimeStreamSlicer( - start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000"), - end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), + end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), step="1d", - cursor_field=InterpolatedString(cursor_field), + cursor_field=InterpolatedString(string=cursor_field, options={}), datetime_format=datetime_format, - lookback_window=InterpolatedString("0d"), + lookback_window=InterpolatedString(string="0d", options={}), config=config, + options={}, ) slicer._cursor = previous_cursor slicer.update_cursor(stream_slice, last_record) @@ -402,45 +402,47 @@ def test_update_cursor(test_name, previous_cursor, stream_slice, last_record, ex ) def test_request_option(test_name, inject_into, field_name, expected_req_params, expected_headers, expected_body_json, expected_body_data): if inject_into == RequestOptionType.path: - start_request_option = RequestOption(inject_into) - end_request_option = RequestOption(inject_into) + start_request_option = RequestOption(inject_into=inject_into, options={}) + end_request_option = RequestOption(inject_into=inject_into, options={}) try: DatetimeStreamSlicer( - start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000"), - end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), + end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), step="1d", - cursor_field=InterpolatedString(cursor_field), + cursor_field=InterpolatedString(string=cursor_field, options={}), datetime_format=datetime_format, - lookback_window=InterpolatedString("0d"), + lookback_window=InterpolatedString(string="0d", options={}), start_time_option=start_request_option, end_time_option=end_request_option, config=config, + options={}, ) assert False except ValueError: return else: - start_request_option = RequestOption(inject_into, field_name) if inject_into else None - end_request_option = RequestOption(inject_into, "endtime") if inject_into else None + start_request_option = RequestOption(inject_into=inject_into, options={}, field_name=field_name) if inject_into else None + end_request_option = RequestOption(inject_into=inject_into, options={}, field_name="endtime") if inject_into else None slicer = DatetimeStreamSlicer( - start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000"), - end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000"), + start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), + end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), step="1d", - cursor_field=InterpolatedString(cursor_field), + cursor_field=InterpolatedString(string=cursor_field, options={}), datetime_format=datetime_format, - lookback_window=InterpolatedString("0d"), + lookback_window=InterpolatedString(string="0d", options={}), start_time_option=start_request_option, end_time_option=end_request_option, config=config, + options={}, ) stream_slice = {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"} slicer.update_cursor(stream_slice) - assert expected_req_params == slicer.request_params(stream_slice=stream_slice) - assert expected_headers == slicer.request_headers(stream_slice=stream_slice) - assert expected_body_json == slicer.request_body_json(stream_slice=stream_slice) - assert expected_body_data == slicer.request_body_data(stream_slice=stream_slice) + assert expected_req_params == slicer.get_request_params(stream_slice=stream_slice) + assert expected_headers == slicer.get_request_headers(stream_slice=stream_slice) + assert expected_body_json == slicer.get_request_body_json(stream_slice=stream_slice) + assert expected_body_data == slicer.get_request_body_data(stream_slice=stream_slice) if __name__ == "__main__": diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_list_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_list_stream_slicer.py similarity index 61% rename from airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_list_slicer.py rename to airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_list_stream_slicer.py index ccb8ef40803c6..1245a7c14ba00 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_list_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_list_stream_slicer.py @@ -9,6 +9,7 @@ slice_values = ["customer", "store", "subscription"] cursor_field = "owner_resource" +options = {"cursor_field": "owner_resource"} @pytest.mark.parametrize( @@ -26,10 +27,16 @@ "owner_resource", [{"owner_resource": "customer"}, {"owner_resource": "store"}, {"owner_resource": "subscription"}], ), + ( + "test_using_cursor_from_options", + '["customer", "store", "subscription"]', + "{{ options['cursor_field'] }}", + [{"owner_resource": "customer"}, {"owner_resource": "store"}, {"owner_resource": "subscription"}], + ), ], ) -def test_list_slicer(test_name, slice_values, cursor_field, expected_slices): - slicer = ListStreamSlicer(slice_values, cursor_field, config={}) +def test_list_stream_slicer(test_name, slice_values, cursor_field, expected_slices): + slicer = ListStreamSlicer(slice_values=slice_values, cursor_field=cursor_field, config={}, options=options) slices = [s for s in slicer.stream_slices(SyncMode.incremental, stream_state=None)] assert slices == expected_slices @@ -43,7 +50,7 @@ def test_list_slicer(test_name, slice_values, cursor_field, expected_slices): ], ) def test_update_cursor(test_name, stream_slice, last_record, expected_state): - slicer = ListStreamSlicer(slice_values, cursor_field, config={}) + slicer = ListStreamSlicer(slice_values=slice_values, cursor_field=cursor_field, config={}, options={}) slicer.update_cursor(stream_slice, last_record) updated_state = slicer.get_stream_state() assert expected_state == updated_state @@ -54,16 +61,23 @@ def test_update_cursor(test_name, stream_slice, last_record, expected_state): [ ( "test_inject_into_req_param", - RequestOption(RequestOptionType.request_parameter, "owner_resource"), + RequestOption(inject_into=RequestOptionType.request_parameter, options={}, field_name="owner_resource"), {"owner_resource": "customer"}, {}, {}, {}, ), - ("test_pass_by_header", RequestOption(RequestOptionType.header, "owner_resource"), {}, {"owner_resource": "customer"}, {}, {}), + ( + "test_pass_by_header", + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="owner_resource"), + {}, + {"owner_resource": "customer"}, + {}, + {}, + ), ( "test_inject_into_body_json", - RequestOption(RequestOptionType.body_json, "owner_resource"), + RequestOption(inject_into=RequestOptionType.body_json, options={}, field_name="owner_resource"), {}, {}, {"owner_resource": "customer"}, @@ -71,7 +85,7 @@ def test_update_cursor(test_name, stream_slice, last_record, expected_state): ), ( "test_inject_into_body_data", - RequestOption(RequestOptionType.body_data, "owner_resource"), + RequestOption(inject_into=RequestOptionType.body_data, options={}, field_name="owner_resource"), {}, {}, {}, @@ -79,7 +93,7 @@ def test_update_cursor(test_name, stream_slice, last_record, expected_state): ), ( "test_inject_into_path", - RequestOption(RequestOptionType.path), + RequestOption(RequestOptionType.path, {}), {}, {}, {}, @@ -90,15 +104,15 @@ def test_update_cursor(test_name, stream_slice, last_record, expected_state): def test_request_option(test_name, request_option, expected_req_params, expected_headers, expected_body_json, expected_body_data): if request_option.inject_into == RequestOptionType.path: try: - ListStreamSlicer(slice_values, cursor_field, {}, request_option) + ListStreamSlicer(slice_values=slice_values, cursor_field=cursor_field, config={}, request_option=request_option, options={}) assert False except ValueError: return - slicer = ListStreamSlicer(slice_values, cursor_field, {}, request_option) + slicer = ListStreamSlicer(slice_values=slice_values, cursor_field=cursor_field, config={}, request_option=request_option, options={}) stream_slice = {cursor_field: "customer"} slicer.update_cursor(stream_slice) - assert expected_req_params == slicer.request_params(stream_slice) - assert expected_headers == slicer.request_headers() - assert expected_body_json == slicer.request_body_json() - assert expected_body_data == slicer.request_body_data() + assert expected_req_params == slicer.get_request_params(stream_slice) + assert expected_headers == slicer.get_request_headers() + assert expected_body_json == slicer.get_request_body_json() + assert expected_body_data == slicer.get_request_body_data() diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_substream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_substream_slicer.py index d3e15a5c9fba9..d4c8d5ad1f1f4 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_substream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_substream_slicer.py @@ -59,17 +59,35 @@ def read_records( ("test_no_parents", [], None), ( "test_single_parent_slices_no_records", - [ParentStreamConfig(MockStream([{}], [], "first_stream"), "id", "first_stream_id")], + [ + ParentStreamConfig( + stream=MockStream([{}], [], "first_stream"), parent_key="id", stream_slice_field="first_stream_id", options={} + ) + ], [{"first_stream_id": None, "parent_slice": None}], ), ( "test_single_parent_slices_with_records", - [ParentStreamConfig(MockStream([{}], parent_records, "first_stream"), "id", "first_stream_id")], + [ + ParentStreamConfig( + stream=MockStream([{}], parent_records, "first_stream"), + parent_key="id", + stream_slice_field="first_stream_id", + options={}, + ) + ], [{"first_stream_id": 1, "parent_slice": None}, {"first_stream_id": 2, "parent_slice": None}], ), ( "test_with_parent_slices_and_records", - [ParentStreamConfig(MockStream(parent_slices, all_parent_data, "first_stream"), "id", "first_stream_id")], + [ + ParentStreamConfig( + stream=MockStream(parent_slices, all_parent_data, "first_stream"), + parent_key="id", + stream_slice_field="first_stream_id", + options={}, + ) + ], [ {"parent_slice": "first", "first_stream_id": 0}, {"parent_slice": "first", "first_stream_id": 1}, @@ -81,9 +99,17 @@ def read_records( "test_multiple_parent_streams", [ ParentStreamConfig( - MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), "id", "first_stream_id" + stream=MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), + parent_key="id", + stream_slice_field="first_stream_id", + options={}, + ), + ParentStreamConfig( + stream=MockStream(second_parent_stream_slice, more_records, "second_stream"), + parent_key="id", + stream_slice_field="second_stream_id", + options={}, ), - ParentStreamConfig(MockStream(second_parent_stream_slice, more_records, "second_stream"), "id", "second_stream_id"), ], [ {"parent_slice": "first", "first_stream_id": 0}, @@ -99,11 +125,11 @@ def read_records( def test_substream_slicer(test_name, parent_stream_configs, expected_slices): if expected_slices is None: try: - SubstreamSlicer(parent_stream_configs) + SubstreamSlicer(parent_stream_configs=parent_stream_configs, options={}) assert False except ValueError: return - slicer = SubstreamSlicer(parent_stream_configs) + slicer = SubstreamSlicer(parent_stream_configs=parent_stream_configs, options={}) slices = [s for s in slicer.stream_slices(SyncMode.incremental, stream_state=None)] assert slices == expected_slices @@ -124,12 +150,20 @@ def test_substream_slicer(test_name, parent_stream_configs, expected_slices): def test_update_cursor(test_name, stream_slice, expected_state): parent_stream_name_to_config = [ ParentStreamConfig( - MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), "id", "first_stream_id" + stream=MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), + parent_key="id", + stream_slice_field="first_stream_id", + options={}, + ), + ParentStreamConfig( + stream=MockStream(second_parent_stream_slice, more_records, "second_stream"), + parent_key="id", + stream_slice_field="second_stream_id", + options={}, ), - ParentStreamConfig(MockStream(second_parent_stream_slice, more_records, "second_stream"), "id", "second_stream_id"), ] - slicer = SubstreamSlicer(parent_stream_name_to_config) + slicer = SubstreamSlicer(parent_stream_configs=parent_stream_name_to_config, options={}) slicer.update_cursor(stream_slice, None) updated_state = slicer.get_stream_state() assert expected_state == updated_state @@ -141,8 +175,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ( "test_request_option_in_request_param", [ - RequestOption(RequestOptionType.request_parameter, "first_stream"), - RequestOption(RequestOptionType.request_parameter, "second_stream"), + RequestOption(inject_into=RequestOptionType.request_parameter, options={}, field_name="first_stream"), + RequestOption(inject_into=RequestOptionType.request_parameter, options={}, field_name="second_stream"), ], {"first_stream_id": "1234", "second_stream_id": "4567"}, {}, @@ -152,8 +186,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ( "test_request_option_in_header", [ - RequestOption(RequestOptionType.header, "first_stream"), - RequestOption(RequestOptionType.header, "second_stream"), + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="first_stream"), + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="second_stream"), ], {}, {"first_stream_id": "1234", "second_stream_id": "4567"}, @@ -163,8 +197,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ( "test_request_option_in_param_and_header", [ - RequestOption(RequestOptionType.request_parameter, "first_stream"), - RequestOption(RequestOptionType.header, "second_stream"), + RequestOption(inject_into=RequestOptionType.request_parameter, options={}, field_name="first_stream"), + RequestOption(inject_into=RequestOptionType.header, options={}, field_name="second_stream"), ], {"first_stream_id": "1234"}, {"second_stream_id": "4567"}, @@ -174,8 +208,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ( "test_request_option_in_body_json", [ - RequestOption(RequestOptionType.body_json, "first_stream"), - RequestOption(RequestOptionType.body_json, "second_stream"), + RequestOption(inject_into=RequestOptionType.body_json, options={}, field_name="first_stream"), + RequestOption(inject_into=RequestOptionType.body_json, options={}, field_name="second_stream"), ], {}, {}, @@ -185,8 +219,8 @@ def test_update_cursor(test_name, stream_slice, expected_state): ( "test_request_option_in_body_data", [ - RequestOption(RequestOptionType.body_data, "first_stream"), - RequestOption(RequestOptionType.body_data, "second_stream"), + RequestOption(inject_into=RequestOptionType.body_data, options={}, field_name="first_stream"), + RequestOption(inject_into=RequestOptionType.body_data, options={}, field_name="second_stream"), ], {}, {}, @@ -204,24 +238,27 @@ def test_request_option( expected_body_data, ): slicer = SubstreamSlicer( - [ + parent_stream_configs=[ ParentStreamConfig( - MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), - "id", - "first_stream_id", - parent_stream_request_options[0], + stream=MockStream(parent_slices, data_first_parent_slice + data_second_parent_slice, "first_stream"), + parent_key="id", + stream_slice_field="first_stream_id", + options={}, + request_option=parent_stream_request_options[0], ), ParentStreamConfig( - MockStream(second_parent_stream_slice, more_records, "second_stream"), - "id", - "second_stream_id", - parent_stream_request_options[1], + stream=MockStream(second_parent_stream_slice, more_records, "second_stream"), + parent_key="id", + stream_slice_field="second_stream_id", + options={}, + request_option=parent_stream_request_options[1], ), ], + options={}, ) slicer.update_cursor({"first_stream_id": "1234", "second_stream_id": "4567"}, None) - assert expected_req_params == slicer.request_params() - assert expected_headers == slicer.request_headers() - assert expected_body_json == slicer.request_body_json() - assert expected_body_data == slicer.request_body_data() + assert expected_req_params == slicer.get_request_params() + assert expected_headers == slicer.get_request_headers() + assert expected_body_json == slicer.get_request_body_json() + assert expected_body_data == slicer.get_request_body_data() diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_create_partial.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_create_partial.py index cb239d0eca17a..3ba79ab81e7dd 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_create_partial.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_create_partial.py @@ -7,9 +7,10 @@ class AClass: - def __init__(self, parameter, another_param): + def __init__(self, parameter, another_param, options): self.parameter = parameter self.another_param = another_param + self.options = options class OuterClass: @@ -42,12 +43,20 @@ def test_string_interpolation(): s = "{{ next_page_token['next_page_url'] }}" partial = create(InterpolatedString, string=s) interpolated_string = partial() - assert interpolated_string._string == s + assert interpolated_string.string == s def test_string_interpolation_through_kwargs(): s = "{{ options['name'] }}" options = {"name": "airbyte"} - partial = create(InterpolatedString, string=s, options=options) + partial = create(InterpolatedString, string=s, **options) + interpolated_string = partial() + assert interpolated_string.eval({}) == "airbyte" + + +def test_string_interpolation_through_options_keyword(): + s = "{{ options['name'] }}" + options = {"$options": {"name": "airbyte"}} + partial = create(InterpolatedString, string=s, **options) interpolated_string = partial() assert interpolated_string.eval({}) == "airbyte" diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_declarative_stream.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_declarative_stream.py index 55295e7ca3772..1b6b849062713 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_declarative_stream.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_declarative_stream.py @@ -38,12 +38,13 @@ def test_declarative_stream(): stream = DeclarativeStream( name=name, primary_key=primary_key, - cursor_field=cursor_field, + stream_cursor_field=cursor_field, schema_loader=schema_loader, retriever=retriever, config=config, transformations=transformations, checkpoint_interval=checkpoint_interval, + options={}, ) assert stream.name == name diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index 20a2cf248812f..190c448460475 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -11,6 +11,7 @@ from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector +from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory from airbyte_cdk.sources.declarative.parsers.yaml_parser import YamlParser from airbyte_cdk.sources.declarative.requesters.error_handlers.composite_error_handler import CompositeErrorHandler @@ -44,6 +45,8 @@ def test_factory(): offset: "{{ next_page_token['offset'] }}" limit: "*ref(limit)" request_options: + $options: + here: "iam" class_name: airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider.InterpolatedRequestOptionsProvider request_parameters: "*ref(offset_request_parameters)" request_body_json: @@ -51,11 +54,12 @@ def test_factory(): """ config = parser.parse(content) request_options_provider = factory.create_component(config["request_options"], input_config)() + assert type(request_options_provider) == InterpolatedRequestOptionsProvider assert request_options_provider._parameter_interpolator._config == input_config - assert request_options_provider._parameter_interpolator._interpolator._mapping["offset"] == "{{ next_page_token['offset'] }}" + assert request_options_provider._parameter_interpolator._interpolator.mapping["offset"] == "{{ next_page_token['offset'] }}" assert request_options_provider._body_json_interpolator._config == input_config - assert request_options_provider._body_json_interpolator._interpolator._mapping["body_offset"] == "{{ next_page_token['offset'] }}" + assert request_options_provider._body_json_interpolator._interpolator.mapping["body_offset"] == "{{ next_page_token['offset'] }}" def test_interpolate_config(): @@ -72,12 +76,13 @@ def test_interpolate_config(): """ config = parser.parse(content) authenticator = factory.create_component(config["authenticator"], input_config)() - assert authenticator._client_id.eval(input_config) == "some_client_id" - assert authenticator._client_secret._string == "some_client_secret" + assert authenticator.client_id.eval(input_config) == "some_client_id" + assert authenticator.client_secret.string == "some_client_secret" - assert authenticator._token_refresh_endpoint.eval(input_config) == "https://api.sendgrid.com/v3/auth" - assert authenticator._refresh_token.eval(input_config) == "verysecrettoken" - assert authenticator._refresh_request_body._mapping == {"body_field": "yoyoyo", "interpolated_body_field": "{{ config['apikey'] }}"} + assert authenticator.token_refresh_endpoint.eval(input_config) == "https://api.sendgrid.com/v3/auth" + assert authenticator.refresh_token.eval(input_config) == "verysecrettoken" + assert authenticator._refresh_request_body.mapping == {"body_field": "yoyoyo", "interpolated_body_field": "{{ config['apikey'] }}"} + assert authenticator.get_refresh_request_body() == {"body_field": "yoyoyo", "interpolated_body_field": "verysecrettoken"} def test_list_based_stream_slicer_with_values_refd(): @@ -90,7 +95,7 @@ def test_list_based_stream_slicer_with_values_refd(): """ config = parser.parse(content) stream_slicer = factory.create_component(config["stream_slicer"], input_config)() - assert ["airbyte", "airbyte-cloud"] == stream_slicer._slice_values + assert ["airbyte", "airbyte-cloud"] == stream_slicer.slice_values def test_list_based_stream_slicer_with_values_defined_in_config(): @@ -105,17 +110,19 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): """ config = parser.parse(content) stream_slicer = factory.create_component(config["stream_slicer"], input_config)() - assert ["airbyte", "airbyte-cloud"] == stream_slicer._slice_values - assert stream_slicer._request_option._option_type == RequestOptionType.header - assert stream_slicer._request_option._field_name == "repository" + assert ["airbyte", "airbyte-cloud"] == stream_slicer.slice_values + assert stream_slicer.request_option.inject_into == RequestOptionType.header + assert stream_slicer.request_option.field_name == "repository" def test_create_substream_slicer(): content = """ schema_loader: - file_path: "./source_sendgrid/schemas/{{name}}.yaml" + file_path: "./source_sendgrid/schemas/{{ options['stream_name'] }}.yaml" + name: "{{ options['stream_name'] }}" retriever: requester: + name: "{{ options['stream_name'] }}" path: "/v3" record_selector: extractor: @@ -123,22 +130,22 @@ def test_create_substream_slicer(): stream_A: type: DeclarativeStream $options: - name: "A" - primary_key: "id" + stream_name: "A" + stream_primary_key: "id" retriever: "*ref(retriever)" url_base: "https://airbyte.io" schema_loader: "*ref(schema_loader)" stream_B: type: DeclarativeStream $options: - name: "B" - primary_key: "id" + stream_name: "B" + stream_primary_key: "id" retriever: "*ref(retriever)" url_base: "https://airbyte.io" schema_loader: "*ref(schema_loader)" stream_slicer: type: SubstreamSlicer - parent_streams_configs: + parent_stream_configs: - stream: "*ref(stream_A)" parent_key: id stream_slice_field: repository_id @@ -151,18 +158,18 @@ def test_create_substream_slicer(): """ config = parser.parse(content) stream_slicer = factory.create_component(config["stream_slicer"], input_config)() - parent_stream_configs = stream_slicer._parent_stream_configs + parent_stream_configs = stream_slicer.parent_stream_configs assert len(parent_stream_configs) == 2 assert isinstance(parent_stream_configs[0].stream, DeclarativeStream) assert isinstance(parent_stream_configs[1].stream, DeclarativeStream) - assert stream_slicer._parent_stream_configs[0].parent_key == "id" - assert stream_slicer._parent_stream_configs[0].stream_slice_field == "repository_id" - assert stream_slicer._parent_stream_configs[0].request_option.inject_into == RequestOptionType.request_parameter - assert stream_slicer._parent_stream_configs[0].request_option._field_name == "repository_id" + assert stream_slicer.parent_stream_configs[0].parent_key == "id" + assert stream_slicer.parent_stream_configs[0].stream_slice_field == "repository_id" + assert stream_slicer.parent_stream_configs[0].request_option.inject_into == RequestOptionType.request_parameter + assert stream_slicer.parent_stream_configs[0].request_option.field_name == "repository_id" - assert stream_slicer._parent_stream_configs[1].parent_key == "someid" - assert stream_slicer._parent_stream_configs[1].stream_slice_field == "word_id" - assert stream_slicer._parent_stream_configs[1].request_option is None + assert stream_slicer.parent_stream_configs[1].parent_key == "someid" + assert stream_slicer.parent_stream_configs[1].stream_slice_field == "word_id" + assert stream_slicer.parent_stream_configs[1].request_option is None def test_create_cartesian_stream_slicer(): @@ -185,12 +192,12 @@ def test_create_cartesian_stream_slicer(): """ config = parser.parse(content) stream_slicer = factory.create_component(config["stream_slicer"], input_config)() - underlying_slicers = stream_slicer._stream_slicers + underlying_slicers = stream_slicer.stream_slicers assert len(underlying_slicers) == 2 assert isinstance(underlying_slicers[0], ListStreamSlicer) assert isinstance(underlying_slicers[1], ListStreamSlicer) - assert ["airbyte", "airbyte-cloud"] == underlying_slicers[0]._slice_values - assert ["hello", "world"] == underlying_slicers[1]._slice_values + assert ["airbyte", "airbyte-cloud"] == underlying_slicers[0].slice_values + assert ["hello", "world"] == underlying_slicers[1].slice_values def test_datetime_stream_slicer(): @@ -216,18 +223,18 @@ def test_datetime_stream_slicer(): stream_slicer = factory.create_component(config["stream_slicer"], input_config)() assert type(stream_slicer) == DatetimeStreamSlicer assert stream_slicer._timezone == datetime.timezone.utc - assert type(stream_slicer._start_datetime) == MinMaxDatetime - assert type(stream_slicer._end_datetime) == MinMaxDatetime - assert stream_slicer._start_datetime._datetime_format == "%Y-%m-%dT%H:%M:%S.%f%z" - assert stream_slicer._start_datetime._timezone == datetime.timezone.utc - assert stream_slicer._start_datetime._datetime_interpolator._string == "{{ config['start_time'] }}" - assert stream_slicer._start_datetime._min_datetime_interpolator._string == "{{ config['start_time'] + day_delta(2) }}" - assert stream_slicer._end_datetime._datetime_interpolator._string == "{{ config['end_time'] }}" + assert type(stream_slicer.start_datetime) == MinMaxDatetime + assert type(stream_slicer.end_datetime) == MinMaxDatetime + assert stream_slicer.start_datetime._datetime_format == "%Y-%m-%dT%H:%M:%S.%f%z" + assert stream_slicer.start_datetime._timezone == datetime.timezone.utc + assert stream_slicer.start_datetime.datetime.string == "{{ config['start_time'] }}" + assert stream_slicer.start_datetime.min_datetime.string == "{{ config['start_time'] + day_delta(2) }}" + assert stream_slicer.end_datetime.datetime.string == "{{ config['end_time'] }}" assert stream_slicer._step == datetime.timedelta(days=10) - assert stream_slicer._cursor_field._string == "created" - assert stream_slicer._lookback_window._string == "5d" - assert stream_slicer._start_time_option.inject_into == RequestOptionType.request_parameter - assert stream_slicer._start_time_option._field_name == "created[gte]" + assert stream_slicer.cursor_field.string == "created" + assert stream_slicer.lookback_window.string == "5d" + assert stream_slicer.start_time_option.inject_into == RequestOptionType.request_parameter + assert stream_slicer.start_time_option.field_name == "created[gte]" def test_full_config(): @@ -266,7 +273,7 @@ def test_full_config(): http_method: "GET" authenticator: type: BearerAuthenticator - token: "{{ config['apikey'] }}" + api_token: "{{ config['apikey'] }}" request_parameters_provider: "*ref(request_options_provider)" error_handler: type: DefaultErrorHandler @@ -314,29 +321,29 @@ def test_full_config(): assert stream_config["cursor_field"] == [] stream = factory.create_component(stream_config, input_config)() - assert isinstance(stream._retriever._record_selector._extractor, JelloExtractor) + assert isinstance(stream.retriever.record_selector.extractor, JelloExtractor) assert type(stream) == DeclarativeStream assert stream.primary_key == "id" assert stream.name == "lists" - assert type(stream._schema_loader) == JsonSchema - assert type(stream._retriever) == SimpleRetriever - assert stream._retriever._requester._method == HttpMethod.GET - assert stream._retriever._requester._authenticator._token.eval(input_config) == "verysecrettoken" - assert type(stream._retriever._record_selector) == RecordSelector - assert type(stream._retriever._record_selector._extractor._decoder) == JsonDecoder - - assert stream._retriever._record_selector._extractor._transform.eval(input_config) == "_.result" - assert type(stream._retriever._record_selector._record_filter) == RecordFilter - assert stream._retriever._record_selector._record_filter._filter_interpolator._condition == "{{ record['id'] > stream_state['id'] }}" - assert stream._schema_loader._get_json_filepath() == "./source_sendgrid/schemas/lists.json" + assert type(stream.schema_loader) == JsonSchema + assert type(stream.retriever) == SimpleRetriever + assert stream.retriever.requester.http_method == HttpMethod.GET + assert stream.retriever.requester.authenticator._token.eval(input_config) == "verysecrettoken" + assert type(stream.retriever.record_selector) == RecordSelector + assert type(stream.retriever.record_selector.extractor.decoder) == JsonDecoder + + assert stream.retriever.record_selector.extractor.transform.eval(input_config) == "_.result" + assert type(stream.retriever.record_selector.record_filter) == RecordFilter + assert stream.retriever.record_selector.record_filter._filter_interpolator.condition == "{{ record['id'] > stream_state['id'] }}" + assert stream.schema_loader._get_json_filepath() == "./source_sendgrid/schemas/lists.json" checker = factory.create_component(config["check"], input_config)() - streams_to_check = checker._stream_names + streams_to_check = checker.stream_names assert len(streams_to_check) == 1 assert list(streams_to_check)[0] == "list_stream" - assert stream._retriever._requester._path._default == "marketing/lists" + assert stream.retriever.requester.path.default == "marketing/lists" def test_create_record_selector(): @@ -356,9 +363,9 @@ def test_create_record_selector(): config = parser.parse(content) selector = factory.create_component(config["selector"], input_config)() assert isinstance(selector, RecordSelector) - assert isinstance(selector._extractor, JelloExtractor) - assert selector._extractor._transform.eval(input_config) == "_.result" - assert isinstance(selector._record_filter, RecordFilter) + assert isinstance(selector.extractor, JelloExtractor) + assert selector.extractor.transform.eval(input_config) == "_.result" + assert isinstance(selector.record_filter, RecordFilter) def test_create_requester(): @@ -367,7 +374,7 @@ def test_create_requester(): type: HttpRequester path: "/v3/marketing/lists" $options: - name: lists + name: 'lists' url_base: "https://api.sendgrid.com" authenticator: type: "BasicHttpAuthenticator" @@ -382,16 +389,16 @@ def test_create_requester(): config = parser.parse(content) component = factory.create_component(config["requester"], input_config)() assert isinstance(component, HttpRequester) - assert isinstance(component._error_handler, DefaultErrorHandler) - assert component._path._string == "/v3/marketing/lists" - assert component._url_base._string == "https://api.sendgrid.com" - assert isinstance(component._authenticator, BasicHttpAuthenticator) - assert component._authenticator._username.eval(input_config) == "lists" - assert component._authenticator._password.eval(input_config) == "verysecrettoken" + assert isinstance(component.error_handler, DefaultErrorHandler) + assert component.path.string == "/v3/marketing/lists" + assert component.url_base.string == "https://api.sendgrid.com" + assert isinstance(component.authenticator, BasicHttpAuthenticator) + assert component.authenticator._username.eval(input_config) == "lists" + assert component.authenticator._password.eval(input_config) == "verysecrettoken" assert component._method == HttpMethod.GET - assert component._request_options_provider._parameter_interpolator._interpolator._mapping["page_size"] == 10 - assert component._request_options_provider._headers_interpolator._interpolator._mapping["header"] == "header_value" - assert component._name == "lists" + assert component._request_options_provider._parameter_interpolator._interpolator.mapping["page_size"] == 10 + assert component._request_options_provider._headers_interpolator._interpolator.mapping["header"] == "header_value" + assert component.name == "lists" def test_create_composite_error_handler(): @@ -408,11 +415,11 @@ def test_create_composite_error_handler(): """ config = parser.parse(content) component = factory.create_component(config["error_handler"], input_config)() - assert len(component._error_handlers) == 2 - assert isinstance(component._error_handlers[0], DefaultErrorHandler) - assert isinstance(component._error_handlers[0]._response_filters[0], HttpResponseFilter) - assert component._error_handlers[0]._response_filters[0]._predicate._condition == "{{ 'code' in response }}" - assert component._error_handlers[1]._response_filters[0]._http_codes == [403] + assert len(component.error_handlers) == 2 + assert isinstance(component.error_handlers[0], DefaultErrorHandler) + assert isinstance(component.error_handlers[0].response_filters[0], HttpResponseFilter) + assert component.error_handlers[0].response_filters[0].predicate.condition == "{{ 'code' in response }}" + assert component.error_handlers[1].response_filters[0].http_codes == [403] assert isinstance(component, CompositeErrorHandler) @@ -425,7 +432,8 @@ def test_config_with_defaults(): primary_key: id url_base: "https://api.sendgrid.com" schema_loader: - file_path: "./source_sendgrid/schemas/{{options.name}}.yaml" + name: "{{ options.stream_name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" retriever: paginator: type: "LimitPaginator" @@ -442,7 +450,7 @@ def test_config_with_defaults(): path: "/v3/marketing/lists" authenticator: type: "BearerAuthenticator" - token: "{{ config.apikey }}" + api_token: "{{ config.apikey }}" request_parameters: page_size: 10 record_selector: @@ -458,17 +466,17 @@ def test_config_with_defaults(): assert type(stream) == DeclarativeStream assert stream.primary_key == "id" assert stream.name == "lists" - assert type(stream._schema_loader) == JsonSchema - assert type(stream._retriever) == SimpleRetriever - assert stream._retriever._requester._method == HttpMethod.GET + assert type(stream.schema_loader) == JsonSchema + assert type(stream.retriever) == SimpleRetriever + assert stream.retriever.requester.http_method == HttpMethod.GET - assert stream._retriever._requester._authenticator._token.eval(input_config) == "verysecrettoken" - assert stream._retriever._record_selector._extractor._transform.eval(input_config) == "_.result" - assert stream._schema_loader._get_json_filepath() == "./source_sendgrid/schemas/lists.yaml" - assert isinstance(stream._retriever._paginator, LimitPaginator) + assert stream.retriever.requester.authenticator._token.eval(input_config) == "verysecrettoken" + assert stream.retriever.record_selector.extractor.transform.eval(input_config) == "_.result" + assert stream.schema_loader._get_json_filepath() == "./source_sendgrid/schemas/lists.yaml" + assert isinstance(stream.retriever.paginator, LimitPaginator) - assert stream._retriever._paginator._url_base._string == "https://api.sendgrid.com" - assert stream._retriever._paginator._page_size == 10 + assert stream.retriever.paginator.url_base.string == "https://api.sendgrid.com" + assert stream.retriever.paginator.page_size == 10 def test_create_limit_paginator(): @@ -491,7 +499,7 @@ def test_create_limit_paginator(): paginator_config = config["paginator"] paginator = factory.create_component(paginator_config, input_config)() assert isinstance(paginator, LimitPaginator) - page_token_option = paginator._page_token_option + page_token_option = paginator.page_token_option assert isinstance(page_token_option, RequestOption) assert page_token_option.inject_into == RequestOptionType.path @@ -503,9 +511,11 @@ class TestCreateTransformations: primary_key: id url_base: "https://api.sendgrid.com" schema_loader: - file_path: "./source_sendgrid/schemas/{{name}}.yaml" + name: "{{ options.name }}" + file_path: "./source_sendgrid/schemas/{{ options.name }}.yaml" retriever: requester: + name: "{{ options.name }}" path: "/v3/marketing/lists" request_parameters: page_size: 10 @@ -524,7 +534,7 @@ def test_no_transformations(self): config = parser.parse(content) component = factory.create_component(config["the_stream"], input_config)() assert isinstance(component, DeclarativeStream) - assert [] == component._transformations + assert [] == component.transformations def test_remove_fields(self): content = f""" @@ -541,8 +551,8 @@ def test_remove_fields(self): config = parser.parse(content) component = factory.create_component(config["the_stream"], input_config)() assert isinstance(component, DeclarativeStream) - expected = [RemoveFields(field_pointers=[["path", "to", "field1"], ["path2"]])] - assert expected == component._transformations + expected = [RemoveFields(field_pointers=[["path", "to", "field1"], ["path2"]], options={})] + assert expected == component.transformations def test_add_fields(self): content = f""" @@ -559,5 +569,14 @@ def test_add_fields(self): config = parser.parse(content) component = factory.create_component(config["the_stream"], input_config)() assert isinstance(component, DeclarativeStream) - expected = [AddFields([AddedFieldDefinition(["field1"], "static_value")])] - assert expected == component._transformations + expected = [ + AddFields( + fields=[ + AddedFieldDefinition( + path=["field1"], value=InterpolatedString(string="static_value", default="static_value", options={}), options={} + ) + ], + options={}, + ) + ] + assert expected == component.transformations diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py index 89941d4ba315d..61fb31ba70562 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py @@ -106,5 +106,5 @@ def test_add_fields( input_record: Mapping[str, Any], field: List[Tuple[FieldPointer, str]], kwargs: Mapping[str, Any], expected: Mapping[str, Any] ): - inputs = [AddedFieldDefinition(v[0], v[1]) for v in field] - assert AddFields(inputs).transform(input_record, **kwargs) == expected + inputs = [AddedFieldDefinition(path=v[0], value=v[1], options={}) for v in field] + assert AddFields(fields=inputs, options={"alas": "i live"}).transform(input_record, **kwargs) == expected diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py index 2040794b26fbf..c1d0358e4cdba 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py @@ -44,5 +44,5 @@ ], ) def test_remove_fields(input_record: Mapping[str, Any], field_pointers: List[FieldPointer], expected: Mapping[str, Any]): - transformation = RemoveFields(field_pointers) + transformation = RemoveFields(field_pointers=field_pointers, options={}) assert transformation.transform(input_record) == expected diff --git a/airbyte-cdk/python/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py b/airbyte-cdk/python/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py index c70c88ecdbc4c..36386d2143d66 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/http/requests_native_auth/test_requests_native_auth.py @@ -128,7 +128,7 @@ def test_refresh_request_body(self): token_expiry_date=pendulum.now().add(days=3), refresh_request_body={"custom_field": "in_outbound_request", "another_field": "exists_in_body", "scopes": ["no_override"]}, ) - body = oauth.get_refresh_request_body() + body = oauth.build_refresh_request_body() expected = { "grant_type": "refresh_token", "client_id": "some_client_id", diff --git a/airbyte-integrations/connector-templates/source-configuration-based/source_{{snakeCase name}}/{{snakeCase name}}.yaml.hbs b/airbyte-integrations/connector-templates/source-configuration-based/source_{{snakeCase name}}/{{snakeCase name}}.yaml.hbs index ad41adab24767..3ca79159242e4 100644 --- a/airbyte-integrations/connector-templates/source-configuration-based/source_{{snakeCase name}}/{{snakeCase name}}.yaml.hbs +++ b/airbyte-integrations/connector-templates/source-configuration-based/source_{{snakeCase name}}/{{snakeCase name}}.yaml.hbs @@ -1,6 +1,6 @@ schema_loader: type: JsonSchema - file_path: "./source_{{snakeCase name}}/schemas/\{{ options.name }}.json" + file_path: "./source_{{snakeCase name}}/schemas/\{{ options['name'] }}.json" selector: type: RecordSelector extractor: @@ -9,13 +9,14 @@ selector: requester: type: HttpRequester name: "\{{ options['name'] }}" - url_base: TODO "your_api_base_url" http_method: "GET" authenticator: - type: TokenAuthenticator - token: "\{{ config['api_key'] }}" + type: ApiKeyAuthenticator + api_token: "\{{ config['api_key'] }}" retriever: type: SimpleRetriever + $options: + url_base: TODO "your_api_base_url" name: "\{{ options['name'] }}" primary_key: "\{{ options['primary_key'] }}" record_selector: