From c7021e6f302c237d47c60fee76a1f63d743b8447 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Thu, 6 Jan 2022 20:49:55 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Source=20S3:=20work-around=20for?= =?UTF-8?q?=20format.delimiter=20change=20'\\t'=20->=20'\t'=20(#9163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * work-around for format.delimiter '\\t' -> '\t' Signed-off-by: Sergey Chvalyuk --- .../69589781-7828-43c5-9f63-8925b1c1ccc2.json | 2 +- .../main/resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 5 +++-- .../connectors/source-s3/Dockerfile | 2 +- .../source-s3/integration_tests/spec.json | 2 +- .../connectors/source-s3/source_s3/source.py | 8 +++++++- .../source_files_abstract/formats/csv_spec.py | 2 +- .../source-s3/unit_tests/test_source.py | 16 ++++++++++++++++ docs/integrations/sources/s3.md | 1 + 9 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 airbyte-integrations/connectors/source-s3/unit_tests/test_source.py diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json index eee0048204e04..13c14ceb20d21 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/69589781-7828-43c5-9f63-8925b1c1ccc2.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "69589781-7828-43c5-9f63-8925b1c1ccc2", "name": "S3", "dockerRepository": "airbyte/source-s3", - "dockerImageTag": "0.1.7", + "dockerImageTag": "0.1.9", "documentationUrl": "https://docs.airbyte.io/integrations/sources/s3", "icon": "s3.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index dadadf52fc82d..391a3d4d4a413 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -593,7 +593,7 @@ - name: S3 sourceDefinitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 dockerRepository: airbyte/source-s3 - dockerImageTag: 0.1.8 + dockerImageTag: 0.1.9 documentationUrl: https://docs.airbyte.io/integrations/sources/s3 icon: s3.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 44629eb404bec..1557ceac0235e 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6047,7 +6047,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-s3:0.1.8" +- dockerImage: "airbyte/source-s3:0.1.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" changelogUrl: "https://docs.airbyte.io/integrations/sources/s3" @@ -6103,7 +6103,8 @@ delimiter: title: "Delimiter" description: "The character delimiting individual cells in the CSV\ - \ data. This may only be a 1-character string." + \ data. This may only be a 1-character string. For tab-delimited\ + \ data enter '\\t'." default: "," minLength: 1 type: "string" diff --git a/airbyte-integrations/connectors/source-s3/Dockerfile b/airbyte-integrations/connectors/source-s3/Dockerfile index 5d0fb5b89aac3..c61cfe03a17cc 100644 --- a/airbyte-integrations/connectors/source-s3/Dockerfile +++ b/airbyte-integrations/connectors/source-s3/Dockerfile @@ -17,5 +17,5 @@ COPY source_s3 ./source_s3 ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.8 +LABEL io.airbyte.version=0.1.9 LABEL io.airbyte.name=airbyte/source-s3 diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json index 9dfac7d239095..07195b750e766 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json @@ -46,7 +46,7 @@ }, "delimiter": { "title": "Delimiter", - "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string.", + "description": "The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.", "default": ",", "minLength": 1, "type": "string" diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source.py b/airbyte-integrations/connectors/source-s3/source_s3/source.py index 382f5ee756f8d..71d0a19031adb 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source.py @@ -3,7 +3,7 @@ # -from typing import Optional +from typing import Any, Mapping, Optional from pydantic import BaseModel, Field @@ -47,3 +47,9 @@ class SourceS3(SourceFilesAbstract): stream_class = IncrementalFileStreamS3 spec_class = SourceS3Spec documentation_url = "https://docs.airbyte.io/integrations/sources/s3" + + def read_config(self, config_path: str) -> Mapping[str, Any]: + config = super().read_config(config_path) + if config.get("format", {}).get("delimiter") == r"\t": + config["format"]["delimiter"] = "\t" + return config diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py b/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py index bce9f15db010e..0fe3faa3e06a4 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/csv_spec.py @@ -21,7 +21,7 @@ class Config: delimiter: str = Field( default=",", min_length=1, - description="The character delimiting individual cells in the CSV data. This may only be a 1-character string.", + description="The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\\t'.", ) quote_char: str = Field( default='"', description="The character used optionally for quoting CSV values. To disallow quoting, make this field blank." diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/test_source.py b/airbyte-integrations/connectors/source-s3/unit_tests/test_source.py new file mode 100644 index 0000000000000..49c788c5334eb --- /dev/null +++ b/airbyte-integrations/connectors/source-s3/unit_tests/test_source.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json + +from source_s3 import SourceS3 + + +def test_transform_backslash_t_to_tab(tmp_path): + config_file = tmp_path / "config.json" + with open(config_file, "w") as fp: + json.dump({"format": {"delimiter": "\\t"}}, fp) + source = SourceS3() + config = source.read_config(config_file) + assert config["format"]["delimiter"] == "\t" diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 0eaf1e18b2646..1d245db3b2a8c 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -206,6 +206,7 @@ You can find details on [here](https://arrow.apache.org/docs/python/generated/py | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.9 | 2022-01-06 | [9163](https://github.com/airbytehq/airbyte/pull/9163) | Work-around for web-UI, `backslash - t` converts to `tab` for `format.delimiter` field. | | 0.1.7 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | | 0.1.6 | 2021-10-15 | [6615](https://github.com/airbytehq/airbyte/pull/6615) & [7058](https://github.com/airbytehq/airbyte/pull/7058) | Memory and performance optimisation. Advanced options for CSV parsing. | | 0.1.5 | 2021-09-24 | [6398](https://github.com/airbytehq/airbyte/pull/6398) | Support custom non Amazon S3 services |