Skip to content

Commit

Permalink
🎉 🐛 Source Airtable: cast native Airtable Types to JSONSchema Types (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
bazarnov authored Jan 28, 2023
1 parent 86c0f66 commit f981668
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
- name: Airtable
sourceDefinitionId: 14c6e7ea-97ed-4f5e-a7b5-25e9a80b8212
dockerRepository: airbyte/source-airtable
dockerImageTag: 1.0.2
dockerImageTag: 2.0.0
documentationUrl: https://docs.airbyte.com/integrations/sources/airtable
icon: airtable.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-airtable:1.0.2"
- dockerImage: "airbyte/source-airtable:2.0.0"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/airtable"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-airtable/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ COPY source_airtable ./source_airtable
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=1.0.2
LABEL io.airbyte.version=2.0.0
LABEL io.airbyte.name=airbyte/source-airtable
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,21 @@ acceptance_tests:
discovery:
tests:
- config_path: "secrets/config.json"
# the static `string` schema is replaced with casted to JSONSchema
backward_compatibility_tests_config:
disable_for_version: "1.0.2"
- config_path: "secrets/config_oauth.json"
# the static `string` schema is replaced with casted to JSONSchema
backward_compatibility_tests_config:
disable_for_version: "1.0.2"
basic_read:
tests:
- config_path: "secrets/config.json"
expect_records:
path: "integration_tests/expected_records.jsonl"
extra_fields: true
exact_order: true
extra_records: false
extra_records: false
- config_path: "secrets/config_oauth.json"
expect_records:
path: "integration_tests/expected_records.jsonl"
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,115 @@
#


from copy import deepcopy
from typing import Any, Dict

from airbyte_cdk.models import AirbyteStream
from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode, SyncMode


class SchemaTypes:

string: Dict = {"type": ["null", "string"]}

number: Dict = {"type": ["null", "number"]}

boolean: Dict = {"type": ["null", "boolean"]}

date: Dict = {"type": ["null", "string"], "format": "date"}

datetime: Dict = {"type": ["null", "string"], "format": "date-time"}

array_with_strings: Dict = {"type": ["null", "array"], "items": {"type": ["null", "string"]}}

# array items should be automatically determined
# based on field complexity
array_with_any: Dict = {"type": ["null", "array"], "items": {}}


# More info about internal Airtable Data Types
# https://airtable.com/developers/web/api/field-model
SIMPLE_AIRTABLE_TYPES: Dict = {
"multipleAttachments": SchemaTypes.string,
"autoNumber": SchemaTypes.number,
"barcode": SchemaTypes.string,
"button": SchemaTypes.string,
"checkbox": SchemaTypes.boolean,
"singleCollaborator": SchemaTypes.string,
"count": SchemaTypes.number,
"createdBy": SchemaTypes.string,
"createdTime": SchemaTypes.datetime,
"currency": SchemaTypes.number,
"email": SchemaTypes.string,
"date": SchemaTypes.date,
"dateTime": SchemaTypes.datetime,
"duration": SchemaTypes.number,
"lastModifiedBy": SchemaTypes.string,
"lastModifiedTime": SchemaTypes.datetime,
"multipleRecordLinks": SchemaTypes.array_with_strings,
"multilineText": SchemaTypes.string,
"multipleCollaborators": SchemaTypes.array_with_strings,
"multipleSelects": SchemaTypes.array_with_strings,
"number": SchemaTypes.number,
"percent": SchemaTypes.number,
"phoneNumber": SchemaTypes.string,
"rating": SchemaTypes.number,
"richText": SchemaTypes.string,
"singleLineText": SchemaTypes.string,
"externalSyncSource": SchemaTypes.string,
"url": SchemaTypes.string,
}

# returns the `array of Any` where Any is based on Simple Types.
# the final array is fulled with some simple type.
COMPLEX_AIRTABLE_TYPES: Dict = {
"formula": SchemaTypes.array_with_any,
"lookup": SchemaTypes.array_with_any,
"multipleLookupValues": SchemaTypes.array_with_any,
"rollup": SchemaTypes.array_with_any,
}


class SchemaHelpers:
@staticmethod
def clean_name(name_str: str) -> str:
return name_str.replace(" ", "_").lower().strip()

@staticmethod
def get_json_schema(table: Dict[str, Any]) -> Dict[str, str]:
fields = table.get("fields", {})
properties = {
"_airtable_id": {"type": ["null", "string"]},
"_airtable_created_time": {"type": ["null", "string"]},
properties: Dict = {
"_airtable_id": SchemaTypes.string,
"_airtable_created_time": SchemaTypes.string,
}

fields: Dict = table.get("fields", {})
for field in fields:
field_name = SchemaHelpers.clean_name(field.get("name"))
properties[field_name] = {"type": ["null", "string"]}
name: str = SchemaHelpers.clean_name(field.get("name"))
original_type: str = field.get("type")
options: Dict = field.get("options", {})
exec_type: str = options.get("result", {}).get("type") if options else None
field_type: str = exec_type if exec_type else original_type

# choose the JsonSchema Type for known Airtable Types
if original_type in COMPLEX_AIRTABLE_TYPES.keys():
complex_type = deepcopy(COMPLEX_AIRTABLE_TYPES.get(original_type))
# process arrays with values
if complex_type == SchemaTypes.array_with_any:
complex_type["items"] = deepcopy(SIMPLE_AIRTABLE_TYPES.get(field_type))
properties.update(**{name: complex_type})
elif original_type in SIMPLE_AIRTABLE_TYPES.keys():
properties.update(**{name: deepcopy(SIMPLE_AIRTABLE_TYPES.get(field_type))})
else:
# Airtable may add more field types in the future and don't consider it a breaking change
properties.update(**{name: SchemaTypes.string})

json_schema = {
json_schema: Dict = {
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"additionalProperties": True,
"properties": properties,
}

return json_schema

@staticmethod
Expand Down
47 changes: 42 additions & 5 deletions docs/integrations/sources/airtable.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ This source syncs data from the [Airtable API](https://airtable.com/api).
3. On the Set up the source page, enter the name for the Airtable connector and select **Airtable** from the Source type dropdown.
4. Click **Authenticate your Airtable account**.
5. Proceed with `login` and `grant the permissions` to the target `bases` you need the access to, we recommend to use `All Workspaces and bases`.
5. Log in and Authorize to the Airtable account and click `Set up source`.
6. Log in and Authorize to the Airtable account and click `Set up source`.


## Supported sync modes
Expand Down Expand Up @@ -54,10 +54,46 @@ The `Enterprise` level accounts are not supported yet.

## Data type map

| Integration Type | Airbyte Type |
| :--------------------- | :----------- |
| `Any` | `string` |

| Integration Type | Airbyte Type | Nullable |
| :--------------------- | :------------------------------------------------------ | -------- |
| `multipleAttachments` | `string` | Yes |
| `autoNumber` | `string` | Yes |
| `barcode` | `string` | Yes |
| `button` | `string` | Yes |
| `checkbox` | `boolean` | Yes |
| `singleCollaborator` | `string` | Yes |
| `count` | `number` | Yes |
| `createdBy` | `string` | Yes |
| `createdTime` | `datetime`, `format: date-time` | Yes |
| `currency` | `number` | Yes |
| `email` | `string` | Yes |
| `date` | `string`, `format: date` | Yes |
| `duration` | `number` | Yes |
| `lastModifiedBy` | `string` | Yes |
| `lastModifiedTime` | `datetime`, `format: date-time` | Yes |
| `multipleRecordLinks` | `array with strings` | Yes |
| `multilineText` | `string` | Yes |
| `multipleCollaborators`| `array with strings` | Yes |
| `multipleSelects` | `array with strings` | Yes |
| `number` | `number` | Yes |
| `percent` | `number` | Yes |
| `phoneNumber` | `string` | Yes |
| `rating` | `number` | Yes |
| `richText` | `string` | Yes |
| `singleLineText` | `string` | Yes |
| `externalSyncSource` | `string` | Yes |
| `url` | `string` | Yes |
| `formula` | `array with any` | Yes |
| `lookup` | `array with any` | Yes |
| `multipleLookupValues` | `array with any` | Yes |
| `rollup` | `array with any` | Yes |

* All the fields are `nullable` by default, meaning that the field could be empty.
* The `array with any` - represents the classic array with one of the other Airtable data types inside, such as:
- string
- number/integer
- nested lists/objects
- etc

### Performance Considerations (Airbyte Open-Source)

Expand All @@ -67,6 +103,7 @@ See information about rate limits [here](https://airtable.com/developers/web/api

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------|
| 2.0.0 | 2023-01-27 | [21962](https://github.com/airbytehq/airbyte/pull/21962) | Added casting of native Airtable data types to JsonSchema types
| 1.0.2 | 2023-01-25 | [20934](https://github.com/airbytehq/airbyte/pull/20934) | Added `OAuth2.0` authentication support
| 1.0.1 | 2023-01-10 | [21215](https://github.com/airbytehq/airbyte/pull/21215) | Fix field names |
| 1.0.0 | 2022-12-22 | [20846](https://github.com/airbytehq/airbyte/pull/20846) | Migrated to Metadata API for dynamic schema generation |
Expand Down

0 comments on commit f981668

Please sign in to comment.