Skip to content

Commit

Permalink
[low-code-connectors] Replace JelloExtractor with DpathExtractor (#15514
Browse files Browse the repository at this point in the history
)

* Handle extracting no records from root

* handle missing keys

* record extractor interface

* dpath extractor

* docstring

* handle extract root array

* Update airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py

Co-authored-by: Sherif A. Nada <snadalive@gmail.com>

* Update airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py

Co-authored-by: Sherif A. Nada <snadalive@gmail.com>

* update docstring

* respect extractor interface

* edge case handling

* document

* use dpath by default

* delete jello extractor

* bump cdk version

* delete jello dependency

* Update reference docs templates

* update template

Co-authored-by: Sherif A. Nada <snadalive@gmail.com>
  • Loading branch information
girarda and sherifnada authored Aug 11, 2022
1 parent 8c30067 commit 6332fd6
Show file tree
Hide file tree
Showing 21 changed files with 240 additions and 156 deletions.
3 changes: 3 additions & 0 deletions airbyte-cdk/python/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 0.1.74
- Replace JelloRecordExtractor with DpathRecordExtractor

## 0.1.73
- Bugfix: Fix bug in DatetimeStreamSlicer's parsing method

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector

__all__ = ["HttpSelector", "JelloExtractor", "RecordFilter", "RecordSelector"]
__all__ = ["HttpSelector", "DpathExtractor", "RecordFilter", "RecordSelector"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

from dataclasses import InitVar, dataclass
from typing import Any, List, Mapping, Union

import dpath.util
import requests
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
from airbyte_cdk.sources.declarative.types import Config, Record
from dataclasses_jsonschema import JsonSchemaMixin


@dataclass
class DpathExtractor(RecordExtractor, JsonSchemaMixin):
"""
Record extractor that searches a decoded response over a path defined as an array of fields.
If the field pointer points to an array, that array is returned.
If the field pointer points to an object, that object is returned wrapped as an array.
If the field pointer points to an empty object, an empty array is returned.
If the field pointer points to a non-existing path, an empty array is returned.
Examples of instantiating this transform:
```
extractor:
type: DpathExtractor
field_pointer:
- "root"
- "data"
```
```
extractor:
type: DpathExtractor
field_pointer:
- "root"
- "{{ options['field'] }}"
```
```
extractor:
type: DpathExtractor
field_pointer: []
```
Attributes:
transform (Union[InterpolatedString, str]): Pointer to the field that should be extracted
config (Config): The user-provided configuration as specified by the source's spec
decoder (Decoder): The decoder responsible to transfom the response in a Mapping
"""

field_pointer: List[Union[InterpolatedString, str]]
config: Config
options: InitVar[Mapping[str, Any]]
decoder: Decoder = JsonDecoder(options={})

def __post_init__(self, options: Mapping[str, Any]):
for pointer_index in range(len(self.field_pointer)):
if isinstance(self.field_pointer[pointer_index], str):
self.field_pointer[pointer_index] = InterpolatedString.create(self.field_pointer[pointer_index], options=options)

def extract_records(self, response: requests.Response) -> List[Record]:
response_body = self.decoder.decode(response)
if len(self.field_pointer) == 0:
extracted = response_body
else:
pointer = [pointer.eval(self.config) for pointer in self.field_pointer]
extracted = dpath.util.get(response_body, pointer, default=[])
if isinstance(extracted, list):
return extracted
elif extracted:
return [extracted]
else:
return []

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List

import requests
from airbyte_cdk.sources.declarative.types import Record


@dataclass
class RecordExtractor(ABC):
"""
Responsible for translating an HTTP response into a list of records by extracting records from the response.
"""

@abstractmethod
def extract_records(
self,
response: requests.Response,
) -> List[Record]:
"""
Selects records from the response
:param response: The response to extract the records from
:return: List of Records extracted from the response
"""
pass
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import requests
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState
from dataclasses_jsonschema import JsonSchemaMixin
Expand All @@ -20,11 +20,11 @@ class RecordSelector(HttpSelector, JsonSchemaMixin):
records based on a heuristic.
Attributes:
extractor (JelloExtractor): The record extractor responsible for extracting records from a response
extractor (RecordExtractor): The record extractor responsible for extracting records from a response
record_filter (RecordFilter): The record filter responsible for filtering extracted records
"""

extractor: JelloExtractor
extractor: RecordExtractor
options: InitVar[Mapping[str, Any]]
record_filter: RecordFilter = None

Expand All @@ -39,9 +39,6 @@ def select_records(
next_page_token: Optional[Mapping[str, Any]] = None,
) -> List[Record]:
all_records = self.extractor.extract_records(response)
# Some APIs don't wrap single records in a list
if not isinstance(all_records, list):
all_records = [all_records]
if self.record_filter:
return self.record_filter.filter_records(
all_records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from airbyte_cdk.sources.declarative.auth.token import ApiKeyAuthenticator, BasicHttpAuthenticator, BearerAuthenticator
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
Expand Down Expand Up @@ -46,11 +46,11 @@
"DatetimeStreamSlicer": DatetimeStreamSlicer,
"DeclarativeStream": DeclarativeStream,
"DefaultErrorHandler": DefaultErrorHandler,
"DpathExtractor": DpathExtractor,
"ExponentialBackoffStrategy": ExponentialBackoffStrategy,
"HttpRequester": HttpRequester,
"InterpolatedBoolean": InterpolatedBoolean,
"InterpolatedString": InterpolatedString,
"JelloExtractor": JelloExtractor,
"JsonSchema": JsonSchema,
"LimitPaginator": LimitPaginator,
"ListStreamSlicer": ListStreamSlicer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
Expand Down Expand Up @@ -50,11 +52,12 @@
InterpolatedString: InterpolatedString,
MinMaxDatetime: MinMaxDatetime,
Paginator: NoPagination,
ParentStreamConfig: ParentStreamConfig,
RecordExtractor: DpathExtractor,
RequestOption: RequestOption,
RequestOptionsProvider: InterpolatedRequestOptionsProvider,
Requester: HttpRequester,
Retriever: SimpleRetriever,
ParentStreamConfig: ParentStreamConfig,
SchemaLoader: JsonSchema,
Stream: DeclarativeStream,
StreamSlicer: SingleSlice,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
Submodules
----------

airbyte\_cdk.sources.declarative.datetime.datetime\_parser module
-----------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.datetime.datetime_parser
:members:
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.datetime.min\_max\_datetime module
-------------------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
Submodules
----------

airbyte\_cdk.sources.declarative.extractors.dpath\_extractor module
-------------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.extractors.dpath_extractor
:members:
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.extractors.http\_selector module
-----------------------------------------------------------------

Expand All @@ -10,10 +18,10 @@ airbyte\_cdk.sources.declarative.extractors.http\_selector module
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.extractors.jello module
--------------------------------------------------------
airbyte\_cdk.sources.declarative.extractors.record\_extractor module
--------------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.extractors.jello
.. automodule:: airbyte_cdk.sources.declarative.extractors.record_extractor
:members:
:undoc-members:
:show-inheritance:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,6 @@ airbyte\_cdk.sources.declarative.requesters.paginators.no\_pagination module
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.requesters.paginators.pagination\_strategy module
----------------------------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.requesters.paginators.pagination_strategy
:members:
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.requesters.paginators.paginator module
-----------------------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ airbyte\_cdk.sources.declarative.requesters.paginators.strategies.page\_incremen
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.requesters.paginators.strategies.pagination\_strategy module
---------------------------------------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy
:members:
:undoc-members:
:show-inheritance:

Module contents
---------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
Submodules
----------

airbyte\_cdk.sources.declarative.requesters.request\_options.interpolated\_request\_input\_provider module
----------------------------------------------------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_input_provider
:members:
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.requesters.request\_options.interpolated\_request\_options\_provider module
------------------------------------------------------------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@ airbyte\_cdk.sources.declarative.requesters.http\_requester module
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.requesters.interpolated\_request\_input\_provider module
-----------------------------------------------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.requesters.interpolated_request_input_provider
:members:
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.requesters.request\_option module
------------------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ airbyte\_cdk.sources.declarative.declarative\_stream module
:undoc-members:
:show-inheritance:

airbyte\_cdk.sources.declarative.read\_exception module
-------------------------------------------------------
airbyte\_cdk.sources.declarative.exceptions module
--------------------------------------------------

.. automodule:: airbyte_cdk.sources.declarative.read_exception
.. automodule:: airbyte_cdk.sources.declarative.exceptions
:members:
:undoc-members:
:show-inheritance:
Expand Down
3 changes: 1 addition & 2 deletions airbyte-cdk/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name="airbyte-cdk",
version="0.1.73",
version="0.1.74",
description="A framework for writing Airbyte Connectors.",
long_description=README,
long_description_content_type="text/markdown",
Expand Down Expand Up @@ -55,7 +55,6 @@
"vcrpy",
"Deprecated~=1.2",
"Jinja2~=3.1.2",
"jello~=1.5.2",
],
python_requires=">=3.9",
extras_require={
Expand Down
Loading

0 comments on commit 6332fd6

Please sign in to comment.