diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json new file mode 100644 index 0000000000000..88e582d9c3880 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json @@ -0,0 +1,7 @@ +{ + "destinationDefinitionId": "ce0d828e-1dc4-496c-b122-2da42e637e48", + "name": "Clickhouse", + "dockerRepository": "airbyte/destination-clickhouse", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse" +} diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 6cbfba046e5f1..a5397fa5e2c22 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -34,6 +34,11 @@ dockerImageTag: 0.2.0 documentationUrl: https://docs.airbyte.io/integrations/destinations/keen icon: chargify.svg +- name: Clickhouse + destinationDefinitionId: ce0d828e-1dc4-496c-b122-2da42e637e48 + dockerRepository: airbyte/destination-clickhouse + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/destinations/clickhouse - name: DynamoDB destinationDefinitionId: 8ccd8909-4e99-4141-b48d-4984b70b2d89 dockerRepository: airbyte/destination-dynamodb diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 86739d566e2fe..c160408c13293 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -525,6 +525,165 @@ supported_destination_sync_modes: - "overwrite" - "append" +- dockerImage: "airbyte/destination-clickhouse:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ClickHouse Destination Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "JDBC port (not the native port) of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: false + order: 5 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" - dockerImage: "airbyte/destination-dynamodb:0.1.0" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb" diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java index 13ce25cf4ee12..e8cde92455baf 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java @@ -75,6 +75,10 @@ public static Database createOracleDatabase(final String username, final String return createDatabase(username, password, jdbcConnectionString, "oracle.jdbc.OracleDriver", SQLDialect.DEFAULT); } + public static Database createClickhouseDatabase(final String username, final String password, final String jdbcConnectionString) { + return createDatabase(username, password, jdbcConnectionString, "ru.yandex.clickhouse.ClickHouseDriver", SQLDialect.DEFAULT); + } + public static Database createMariaDbDatabase(final String username, final String password, final String jdbcConnectionString) { return createDatabase(username, password, jdbcConnectionString, "org.mariadb.jdbc.Driver", SQLDialect.MARIADB); } diff --git a/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile new file mode 100644 index 0000000000000..ba365769c9585 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile @@ -0,0 +1,34 @@ +FROM fishtownanalytics/dbt:0.21.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +#RUN pip install dbt-clickhouse +# dbt-clickhouse adapter has some bugs, use our own just for now +# https://github.com/silentsokolov/dbt-clickhouse/issues/20 +RUN pip install git+https://github.com/burmecia/dbt-clickhouse.git +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-clickhouse diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml new file mode 100755 index 0000000000000..37eca7fcd73f3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml @@ -0,0 +1,64 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'airbyte_utils' +version: '1.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: 'normalize' + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true +# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) +# all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + # schema change test isn't supported in ClickHouse yet + +on_schema_change: "ignore" + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ['airbyte_utils', 'dbt_utils'] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml new file mode 100755 index 0000000000000..8c90040510539 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.7.3 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql index 28b734ee6fe25..080aea5eb104b 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql @@ -32,6 +32,10 @@ VARCHAR(max) {%- endmacro -%} +{% macro clickhouse__type_json() %} + String +{% endmacro %} + {# string ------------------------------------------------- #} @@ -47,6 +51,10 @@ VARCHAR(max) {%- endmacro -%} +{%- macro clickhouse__type_string() -%} + String +{%- endmacro -%} + {# float ------------------------------------------------- #} {% macro mysql__type_float() %} @@ -57,6 +65,10 @@ float {% endmacro %} +{% macro clickhouse__type_float() %} + Float64 +{% endmacro %} + {# int ------------------------------------------------- #} {% macro default__type_int() %} @@ -67,6 +79,11 @@ int {% endmacro %} +{% macro clickhouse__type_int() %} + INT +{% endmacro %} + + {# bigint ------------------------------------------------- #} {% macro mysql__type_bigint() %} signed @@ -76,12 +93,20 @@ numeric {% endmacro %} +{% macro clickhouse__type_bigint() %} + BIGINT +{% endmacro %} + {# numeric ------------------------------------------------- --#} {% macro mysql__type_numeric() %} float {% endmacro %} +{% macro clickhouse__type_numeric() %} + Float64 +{% endmacro %} + {# timestamp ------------------------------------------------- --#} {% macro mysql__type_timestamp() %} @@ -94,6 +119,10 @@ datetime {%- endmacro -%} +{% macro clickhouse__type_timestamp() %} + DateTime64 +{% endmacro %} + {# timestamp with time zone ------------------------------------------------- #} @@ -124,6 +153,10 @@ datetime {%- endmacro -%} +{% macro clickhouse__type_timestamp_with_timezone() %} + DateTime64 +{% endmacro %} + {# date ------------------------------------------------- #} @@ -142,3 +175,7 @@ {%- macro sqlserver__type_date() -%} date {%- endmacro -%} + +{% macro clickhouse__type_date() %} + Date +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql index e0d27c4ae3b31..619eaf46cbfdc 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql @@ -5,6 +5,7 @@ - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html - Postgres: json_extract_path_text(, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html - MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html + - ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/ #} {# format_json_path -------------------------------------------------- #} @@ -66,6 +67,14 @@ {{ "'$.\"" ~ str_list|join(".") ~ "\"'" }} {%- endmacro %} +{% macro clickhouse__format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%} + {%- endfor -%} + {{ "'" ~ str_list|join("','") ~ "'" }} +{%- endmacro %} + {# json_extract ------------------------------------------------- #} {% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} @@ -124,6 +133,14 @@ json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) {%- endmacro %} +{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) + {% else %} + JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}) + {% endif -%} +{%- endmacro %} + {# json_extract_scalar ------------------------------------------------- #} {% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} @@ -162,6 +179,10 @@ json_value({{ json_column }}, {{ format_json_path(json_path_list) }}) {%- endmacro %} +{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + {# json_extract_array ------------------------------------------------- #} {% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%} @@ -199,3 +220,7 @@ {% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%} json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) {%- endmacro %} + +{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql index c751abfaeb21e..87862498cfc5f 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql @@ -10,3 +10,7 @@ {% macro oracle__quote(column_name) -%} {{ '\"' ~ column_name ~ '\"'}} {%- endmacro %} + +{% macro clickhouse__quote(column_name) -%} + {{ '\"' ~ column_name ~ '\"'}} +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql index a32b59b017745..9de2965409aad 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql @@ -11,3 +11,15 @@ {%- endfor %} ) {%- endmacro %} + +{% macro clickhouse__surrogate_key(field_list) -%} + assumeNotNull(hex(MD5( + {%- for field in field_list %} + {% if not loop.last %} + toString({{ field }}) || '~' || + {% else %} + toString({{ field }}) + {% endif %} + {%- endfor %} + ))) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml index f6d1df902699b..16948b4a22211 100644 --- a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml +++ b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml @@ -29,3 +29,10 @@ services: context: . labels: io.airbyte.git-revision: ${GIT_REVISION} + normalization-clickhouse: + image: airbyte/normalization-clickhouse:${VERSION} + build: + dockerfile: clickhouse.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index c7b26daf6bdd7..bce4152dc7b3f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -13,6 +13,7 @@ import sys import threading import time +from copy import copy from typing import Any, Dict, List from normalization.destination_type import DestinationType @@ -22,6 +23,7 @@ NORMALIZATION_TEST_MSSQL_DB_PORT = "NORMALIZATION_TEST_MSSQL_DB_PORT" NORMALIZATION_TEST_MYSQL_DB_PORT = "NORMALIZATION_TEST_MYSQL_DB_PORT" NORMALIZATION_TEST_POSTGRES_DB_PORT = "NORMALIZATION_TEST_POSTGRES_DB_PORT" +NORMALIZATION_TEST_CLICKHOUSE_DB_PORT = "NORMALIZATION_TEST_CLICKHOUSE_DB_PORT" class DbtIntegrationTest(object): @@ -48,6 +50,8 @@ def setup_db(self, destinations_to_test: List[str]): self.setup_mysql_db() if DestinationType.MSSQL.value in destinations_to_test: self.setup_mssql_db() + if DestinationType.CLICKHOUSE.value in destinations_to_test: + self.setup_clickhouse_db() def setup_postgres_db(self): start_db = True @@ -209,6 +213,73 @@ def setup_mssql_db(self): with open("../secrets/mssql.json", "w") as fh: fh.write(json.dumps(config)) + def setup_clickhouse_db(self): + """ + ClickHouse official JDBC driver use HTTP port 8123, while Python ClickHouse + driver uses native port 9000, so we need to open both ports for destination + connector and dbt container respectively. + + Ref: https://altinity.com/blog/2019/3/15/clickhouse-networking-part-1 + """ + start_db = True + if os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT)) + start_db = False + else: + port = self.find_free_port() + config = { + "host": "localhost", + "port": port, + "database": self.target_schema, + "username": "default", + "password": "", + "ssl": False, + } + if start_db: + self.db_names.append("clickhouse") + print("Starting localhost clickhouse container for tests") + commands = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_clickhouse", + "--ulimit", + "nofile=262144:262144", + "-p", + "9000:9000", # Python clickhouse driver use native port + "-p", + f"{config['port']}:8123", # clickhouse JDBC driver use HTTP port + "-d", + # so far, only the latest version ClickHouse server image turned on + # window functions + "clickhouse/clickhouse-server:latest", + ] + print("Executing: ", " ".join(commands)) + subprocess.call(commands) + print("....Waiting for ClickHouse DB to start...15 sec") + time.sleep(15) + # Run additional commands to prepare the table + command_create_db = [ + "docker", + "run", + "--rm", + "--link", + f"{self.container_prefix}_clickhouse:clickhouse-server", + "clickhouse/clickhouse-client:21.8.10.19", + "--host", + "clickhouse-server", + "--query", + f"CREATE DATABASE IF NOT EXISTS {config['database']}", + ] + # create test db + print("Executing: ", " ".join(command_create_db)) + subprocess.call(command_create_db) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/clickhouse.json", "w") as fh: + fh.write(json.dumps(config)) + @staticmethod def find_free_port(): """ @@ -257,7 +328,14 @@ def generate_profile_yaml_file(self, destination_type: DestinationType, test_roo profiles_config["database"] = self.target_schema else: profiles_config["schema"] = self.target_schema - profiles_yaml = config_generator.transform(destination_type, profiles_config) + if destination_type.value == DestinationType.CLICKHOUSE.value: + # Python ClickHouse driver uses native port 9000, which is different + # from official ClickHouse JDBC driver + clickhouse_config = copy(profiles_config) + clickhouse_config["port"] = 9000 + profiles_yaml = config_generator.transform(destination_type, clickhouse_config) + else: + profiles_yaml = config_generator.transform(destination_type, profiles_config) config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml") return profiles_config @@ -294,6 +372,8 @@ def get_normalization_image(destination_type: DestinationType) -> str: return "airbyte/normalization-mysql:dev" elif DestinationType.ORACLE.value == destination_type.value: return "airbyte/normalization-oracle:dev" + elif DestinationType.CLICKHOUSE.value == destination_type.value: + return "airbyte/normalization-clickhouse:dev" else: return "airbyte/normalization:dev" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..37eca7fcd73f3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml @@ -0,0 +1,64 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'airbyte_utils' +version: '1.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: 'normalize' + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true +# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) +# all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + # schema change test isn't supported in ClickHouse yet + +on_schema_change: "ignore" + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ['airbyte_utils', 'dbt_utils'] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..749e5b38562b4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,107 @@ + + + + + create table test_normalization.dedup_cdc_excluded_scd + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.dedup_cdc_excluded_ab3 + -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded +), + +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_updated_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..49c1843204fee --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,113 @@ + + + + + create table test_normalization.dedup_exchange_rate_scd + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.dedup_exchange_rate_ab3 + -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +), + +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(NZD) + + ))) as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..61f46aa4665c4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,93 @@ + + + + + create table test_normalization.renamed_dedup_cdc_excluded_scd + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3 + -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +), + +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..4b2055de8600d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,33 @@ + + + + + create table test_normalization.dedup_cdc_excluded + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from test_normalization.dedup_cdc_excluded_scd +-- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..d1dc1abdc7142 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,36 @@ + + + + + create table test_normalization.dedup_exchange_rate + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from test_normalization.dedup_exchange_rate_scd +-- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..5295b9baa8dae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,29 @@ + + + + + create table test_normalization.renamed_dedup_cdc_excluded + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from test_normalization.renamed_dedup_cdc_excluded_scd +-- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..0eb15bc43e455 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,115 @@ + + + + create table test_normalization.exchange_rate__dbt_tmp + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + JSONExtractRaw(_airbyte_data, 'column`_''with\"_quotes') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + nullif(accurateCastOrNull(trim(BOTH '"' from "column`_'with""_quotes"), 'String'), 'null') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) || '~' || + + + toString("column`_'with""_quotes") + + ))) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +select + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql new file mode 100644 index 0000000000000..fe2bf632dbf20 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -0,0 +1,71 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +with __dbt__cte__dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) + + ))) as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from __dbt__cte__dedup_cdc_excluded_ab2 tmp +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql new file mode 100644 index 0000000000000..28abd1a79a7f2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -0,0 +1,86 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab3__dbt_tmp + + as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql new file mode 100644 index 0000000000000..9f515f09a4a44 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -0,0 +1,78 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab3__dbt_tmp + + as ( + +with __dbt__cte__pos_dedup_cdcx_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + +), __dbt__cte__pos_dedup_cdcx_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, ' + Float64 +') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_log_pos) + + ))) as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from __dbt__cte__pos_dedup_cdcx_ab2 tmp +-- pos_dedup_cdcx +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql new file mode 100644 index 0000000000000..43c5b8ad9e18a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -0,0 +1,45 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..ad250a2de1969 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..b4921f53776b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, '{{ dbt_utils.type_float() }}') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_cdc_excluded_ab1') }} +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..bdfc716769aee --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..351ccad8f300e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }})) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, + accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, + accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 0000000000000..59f1c4bcfba0c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 0000000000000..a48a14a7aecc3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }})) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, + accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, + accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + nullif(accurateCastOrNull(trim(BOTH '"' from {{ quote('column`_\'with""_quotes') }}), '{{ dbt_utils.type_string() }}'), 'null') as {{ quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('exchange_rate_ab1') }} +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 0000000000000..d6593d4eb8f1a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + quote('HKD@spéçiäl & characters'), + 'HKD_special___characters', + 'NZD', + 'USD', + quote('column`_\'with""_quotes'), + ]) }} as _airbyte_exchange_rate_hashid, + tmp.* +from {{ ref('exchange_rate_ab2') }} tmp +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql new file mode 100644 index 0000000000000..182bcd7dbfd4f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_log_pos'], ['_ab_cdc_log_pos']) }} as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias +-- pos_dedup_cdcx +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql new file mode 100644 index 0000000000000..3769adf4d02e0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -0,0 +1,21 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, '{{ dbt_utils.type_float() }}') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, '{{ dbt_utils.type_float() }}') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('pos_dedup_cdcx_ab1') }} +-- pos_dedup_cdcx +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..4504a7bbffa32 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,16 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..8b248db9590f7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,16 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..eca9d38763a06 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,117 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_ab3') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_cdc_excluded_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('dedup_cdc_excluded_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_cdc_excluded_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_cdc_excluded_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_cdc_excluded_ab3') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +), +{% endif %} +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..97757d03ce77d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,127 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_ab3') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('dedup_exchange_rate_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_ab3') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..c0dcee2b2ccbb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,109 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_ab3') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('renamed_dedup_cdc_excluded_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('renamed_dedup_cdc_excluded_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('renamed_dedup_cdc_excluded_ab3') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +), +{% endif %} +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..086676f173719 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from {{ ref('dedup_cdc_excluded_scd') }} +-- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..3fb8238f2479b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,26 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..8a8ff85f59024 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from {{ ref('renamed_dedup_cdc_excluded_scd') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..c370f10264a3f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + {{ quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql new file mode 100644 index 0000000000000..446204f691ebc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'name', + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('dedup_cdc_excluded_ab2') }} tmp +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql new file mode 100644 index 0000000000000..b14a1fb639b1b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + quote('HKD@spéçiäl & characters'), + 'HKD_special___characters', + 'NZD', + 'USD', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql new file mode 100644 index 0000000000000..dbe0c313b238b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -0,0 +1,21 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'name', + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + '_ab_cdc_log_pos', + ]) }} as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from {{ ref('pos_dedup_cdcx_ab2') }} tmp +-- pos_dedup_cdcx +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql new file mode 100644 index 0000000000000..2356b929f1f38 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -0,0 +1,16 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..45c338b893cab --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,13 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..029806e67c97d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from dedup_cdc_excluded_scd__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..25b31a3767bc3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from dedup_exchange_rate_scd__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..0fc967c7e00a1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from renamed_dedup_cdc_excluded_scd__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..bd7ed508ea036 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_cdc_excluded ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from dedup_cdc_excluded__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..f4ee25a87e974 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from dedup_exchange_rate__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..3ee365f07d589 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from renamed_dedup_cdc_excluded__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..0eb15bc43e455 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,115 @@ + + + + create table test_normalization.exchange_rate__dbt_tmp + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + JSONExtractRaw(_airbyte_data, 'column`_''with\"_quotes') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + nullif(accurateCastOrNull(trim(BOTH '"' from "column`_'with""_quotes"), 'String'), 'null') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) || '~' || + + + toString("column`_'with""_quotes") + + ))) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +select + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql new file mode 100644 index 0000000000000..fe2bf632dbf20 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -0,0 +1,71 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +with __dbt__cte__dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) + + ))) as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from __dbt__cte__dedup_cdc_excluded_ab2 tmp +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql new file mode 100644 index 0000000000000..28abd1a79a7f2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -0,0 +1,86 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab3__dbt_tmp + + as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql new file mode 100644 index 0000000000000..9f515f09a4a44 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -0,0 +1,78 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab3__dbt_tmp + + as ( + +with __dbt__cte__pos_dedup_cdcx_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + +), __dbt__cte__pos_dedup_cdcx_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, ' + Float64 +') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_log_pos) + + ))) as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from __dbt__cte__pos_dedup_cdcx_ab2 tmp +-- pos_dedup_cdcx +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql new file mode 100644 index 0000000000000..43c5b8ad9e18a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -0,0 +1,45 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json index 827dd4fd1642c..ddb47f1fbbcb1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json @@ -38,5 +38,6 @@ "mssql": [ { "- HKD_special___characters": "- '\"HKD_special___characters\"'" }, { "!= HKD_special___characters": "!= \"HKD_special___characters\"" } - ] + ], + "clickhouse": [] } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py index 5373473120ade..b411ae74d278f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py @@ -68,7 +68,10 @@ def setup_test_path(request): def test_normalization(destination_type: DestinationType, test_resource_name: str, setup_test_path): if destination_type.value not in dbt_test_utils.get_test_targets(): pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - if destination_type.value == DestinationType.ORACLE.value and test_resource_name == "test_nested_streams": + if ( + destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) + and test_resource_name == "test_nested_streams" + ): pytest.skip(f"Destinations {destination_type} does not support nested streams") target_schema = dbt_test_utils.target_schema @@ -127,6 +130,8 @@ def run_schema_change_normalization(destination_type: DestinationType, test_reso if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: # TODO: upgrade dbt-adapter repositories to work with dbt 0.21.0+ (outside airbyte's control) pytest.skip(f"{destination_type} does not support schema change in incremental yet (requires dbt 0.21.0+)") + if destination_type.value in [DestinationType.SNOWFLAKE.value, DestinationType.CLICKHOUSE.value]: + pytest.skip(f"{destination_type} is disabled as it doesnt support schema change in incremental yet (column type changes)") if destination_type.value in [DestinationType.MSSQL.value, DestinationType.SNOWFLAKE.value]: # TODO: create/fix github issue in corresponding dbt-adapter repository to handle schema changes (outside airbyte's control) pytest.skip(f"{destination_type} is disabled as it doesnt fully support schema change in incremental yet") @@ -184,6 +189,9 @@ def setup_test_dir(destination_type: DestinationType, test_resource_name: str) - elif destination_type.value == DestinationType.ORACLE.value: copy_tree("../dbt-project-template-oracle", test_root_dir) dbt_project_yaml = "../dbt-project-template-oracle/dbt_project.yml" + elif destination_type.value == DestinationType.CLICKHOUSE.value: + copy_tree("../dbt-project-template-clickhouse", test_root_dir) + dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" dbt_test_utils.copy_replace(dbt_project_yaml, os.path.join(test_root_dir, "dbt_project.yml")) return test_root_dir diff --git a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py index c25c8982f7047..ef66c789495af 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py +++ b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py @@ -14,6 +14,7 @@ class DestinationType(Enum): POSTGRES = "postgres" REDSHIFT = "redshift" SNOWFLAKE = "snowflake" + CLICKHOUSE = "clickhouse" @classmethod def from_string(cls, string_value: str) -> "DestinationType": diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py index ac4609626159a..352fa8b9f93d1 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py @@ -25,6 +25,8 @@ DestinationType.ORACLE.value: 128, # https://docs.microsoft.com/en-us/sql/odbc/microsoft/column-name-limitations?view=sql-server-ver15 DestinationType.MSSQL.value: 64, + # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified + DestinationType.CLICKHOUSE.value: 63, } # DBT also needs to generate suffix to table names, so we need to make sure it has enough characters to do so... @@ -166,6 +168,8 @@ def __normalize_identifier_name( if self.destination_type == DestinationType.ORACLE: # Oracle dbt lib doesn't implemented adapter quote yet. result = f"quote('{result}')" + elif self.destination_type == DestinationType.CLICKHOUSE: + result = f"quote('{result}')" else: result = f"adapter.quote('{result}')" if not in_jinja: @@ -213,6 +217,8 @@ def __normalize_identifier_case(self, input_name: str, is_quoted: bool = False) result = input_name.lower() else: result = input_name.upper() + elif self.destination_type.value == DestinationType.CLICKHOUSE.value: + pass else: raise KeyError(f"Unknown destination type {self.destination_type}") return result diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py index 7f69a36108837..f23d6e607e38f 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py @@ -3,6 +3,8 @@ # +from typing import Set + from normalization import DestinationType # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords @@ -2533,6 +2535,10 @@ "REGR_SYY", } +# In ClickHouse, keywords are not reserved. +# Ref: https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-keywords +CLICKHOUSE: Set[str] = set() + RESERVED_KEYWORDS = { DestinationType.BIGQUERY.value: BIGQUERY, DestinationType.POSTGRES.value: POSTGRES, @@ -2541,6 +2547,7 @@ DestinationType.MYSQL.value: MYSQL, DestinationType.ORACLE.value: ORACLE, DestinationType.MSSQL.value: MSSQL, + DestinationType.CLICKHOUSE.value: CLICKHOUSE, } diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 9eee220116b74..e41b683b6b1bc 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -509,6 +509,9 @@ def cast_property_type(self, property_name: str, column_name: str, jinja_column: # in case of datetime, we don't need to use [cast] function, use try_parse instead. sql_type = jinja_call("type_timestamp_with_timezone()") return f"try_parse({replace_operation} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + sql_type = jinja_call("type_timestamp_with_timezone()") + return f"parseDateTime64BestEffortOrNull(trim(BOTH '\"' from {replace_operation})) as {column_name}" # in all other cases sql_type = jinja_call("type_timestamp_with_timezone()") return f"cast({replace_operation} as {sql_type}) as {column_name}" @@ -521,16 +524,26 @@ def cast_property_type(self, property_name: str, column_name: str, jinja_column: # in case of date, we don't need to use [cast] function, use try_parse instead. sql_type = jinja_call("type_date()") return f"try_parse({replace_operation} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + sql_type = jinja_call("type_date()") + return f"parseDateTimeBestEffortOrNull(trim(BOTH '\"' from {replace_operation})) as {column_name}" # in all other cases sql_type = jinja_call("type_date()") return f"cast({replace_operation} as {sql_type}) as {column_name}" elif is_string(definition["type"]): sql_type = jinja_call("dbt_utils.type_string()") + if self.destination_type == DestinationType.CLICKHOUSE: + trimmed_column_name = f"trim(BOTH '\"' from {column_name})" + sql_type = f"'{sql_type}'" + return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" else: print(f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}") return column_name - return f"cast({column_name} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + return f"accurateCastOrNull({column_name}, '{sql_type}') as {column_name}" + else: + return f"cast({column_name} as {sql_type}) as {column_name}" @staticmethod def generate_mysql_date_format_statement(column_name: str) -> str: @@ -671,7 +684,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data.{{ unique_key }} = new_data_ids.{{ unique_key }} -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) - left join empty_new_data as inc_data on this_data.{{ col_ab_id }} = inc_data.{{ col_ab_id }} + {{ enable_left_join_null }}left join empty_new_data as inc_data on this_data.{{ col_ab_id }} = inc_data.{{ col_ab_id }} where {{ active_row }} = 1 ), input_data as ( @@ -686,6 +699,17 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ sql_table_comment }} ), {{ '{% endif %}' }} +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + ) as _airbyte_active_row_num + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select @@ -701,24 +725,19 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ field }}, {%- endfor %} {{ cursor_field }} as {{ airbyte_start_at }}, - lag({{ cursor_field }}) over ( + {{ lag_begin }}({{ cursor_field }}) over ( partition by {{ primary_key_partition | join(", ") }} order by {{ cursor_field }} {{ order_null }}, {{ cursor_field }} desc, {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ lag_end }} ) as {{ airbyte_end_at }}, - case when row_number() over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - ) = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, + case when _airbyte_active_row_num = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, {{ col_ab_id }}, {{ col_emitted_at }}, {{ hash_id }} - from input_data + from input_data_with_active_row_num ), dedup_data as ( select @@ -763,6 +782,24 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup # SQL Server treats NULL values as the lowest values, then sorted in ascending order, NULLs come first. order_null = "desc" + lag_begin = "lag" + lag_end = "" + if self.destination_type == DestinationType.CLICKHOUSE: + # ClickHouse doesn't support lag() yet, this is a workaround solution + # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ + lag_begin = "anyOrNull" + lag_end = "ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" + + enable_left_join_null = "" + cast_begin = "cast(" + cast_as = " as " + cast_end = ")" + if self.destination_type == DestinationType.CLICKHOUSE: + enable_left_join_null = "--" + cast_begin = "accurateCastOrNull(" + cast_as = ", '" + cast_end = "')" + # TODO move all cdc columns out of scd models cdc_active_row_pattern = "" cdc_updated_order_pattern = "" @@ -776,10 +813,12 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" cdc_updated_order_pattern = f", {col_cdc_updated_at} desc" cdc_cols = ( - f", cast({col_cdc_deleted_at} as " - + "{{ dbt_utils.type_string() }})" - + f", cast({col_cdc_updated_at} as " - + "{{ dbt_utils.type_string() }})" + f", {cast_begin}{col_cdc_deleted_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" + + f", {cast_begin}{col_cdc_updated_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" ) quoted_cdc_cols = f", {quoted_col_cdc_deleted_at}, {quoted_col_cdc_updated_at}" @@ -787,7 +826,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) cdc_updated_order_pattern += f", {col_cdc_log_pos} desc" - cdc_cols += f", cast({col_cdc_log_pos} as " + "{{ dbt_utils.type_string() }})" + cdc_cols += f", {cast_begin}{col_cdc_log_pos}{cast_as}" + "{{ dbt_utils.type_string() }}" + f"{cast_end}" quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" sql = template.render( @@ -817,6 +856,9 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup cdc_updated_at_order=cdc_updated_order_pattern, cdc_cols=cdc_cols, quoted_cdc_cols=quoted_cdc_cols, + lag_begin=lag_begin, + lag_end=lag_end, + enable_left_join_null=enable_left_join_null, ) return sql diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py index e6b9315cd21ce..d45cbc0623fb0 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py @@ -22,6 +22,7 @@ class DestinationType(Enum): mysql = "mysql" oracle = "oracle" mssql = "mssql" + clickhouse = "clickhouse" class TransformConfig: @@ -66,6 +67,7 @@ def transform(self, integration_type: DestinationType, config: Dict[str, Any]): DestinationType.mysql.value: self.transform_mysql, DestinationType.oracle.value: self.transform_oracle, DestinationType.mssql.value: self.transform_mssql, + DestinationType.clickhouse.value: self.transform_clickhouse, }[integration_type.value](config) # merge pre-populated base_profile with destination-specific configuration. @@ -263,6 +265,20 @@ def transform_mssql(config: Dict[str, Any]): } return dbt_config + @staticmethod + def transform_clickhouse(config: Dict[str, Any]): + print("transform_clickhouse") + # https://docs.getdbt.com/reference/warehouse-profiles/clickhouse-profile + dbt_config = { + "type": "clickhouse", + "host": config["host"], + "port": config["port"], + "schema": config["database"], + "user": config["username"], + "password": config["password"], + } + return dbt_config + @staticmethod def read_json_config(input_path: str): with open(input_path, "r") as file: diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json new file mode 100644 index 0000000000000..84f4fa7a50eb6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json new file mode 100644 index 0000000000000..450b8a7f4bfc8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json new file mode 100644 index 0000000000000..047c8cb29a298 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_names__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another__e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespac__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index e243443886c98..a5878e50a8426 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -108,6 +108,7 @@ | :--- | :--- | | Azure Blob Storage | [![destination-azure-blob-storage](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-azure-blob-storage%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-azure-blob-storage) | | BigQuery | [![destination-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-bigquery%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-bigquery) | +| ClickHouse | [![destination-clickhouse](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-clickhouse%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-clickhouse) | | Cassandra | [![destination-cassandra](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-cassandra%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-cassandra) | | Databricks | (Temporarily Not Available) | | Elasticsearch | (Temporarily Not Available) | diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile new file mode 100644 index 0000000000000..3e11bb8fc9100 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile @@ -0,0 +1,11 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte +ENV APPLICATION destination-clickhouse-strict-encrypt + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-clickhouse-strict-encrypt diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md new file mode 100644 index 0000000000000..aa674ff7cc030 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md @@ -0,0 +1,5 @@ +# ClickHouse Strict Encrypt Test Configuration + +In order to test the ClickHouse destination, you need to have the up and running ClickHouse database that has SSL enabled. + +This connector inherits the ClickHouse destination, but support SSL connections only. diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle new file mode 100644 index 0000000000000..113db2568f599 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle @@ -0,0 +1,30 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.clickhouse.ClickhouseDestination' + applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation project(':airbyte-db:lib') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:connectors:destination-jdbc') + implementation project(':airbyte-integrations:connectors:destination-clickhouse') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + // https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc + implementation 'ru.yandex.clickhouse:clickhouse-jdbc:0.3.1-patch' + + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + testImplementation 'org.testcontainers:clickhouse:1.16.2' + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-clickhouse') + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + integrationTestJavaImplementation "org.testcontainers:clickhouse:1.16.2" +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java new file mode 100644 index 0000000000000..ddbf1b7aa24f7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.spec_modification.SpecModifyingDestination; +import io.airbyte.protocol.models.ConnectorSpecification; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClickhouseDestinationStrictEncrypt extends SpecModifyingDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestinationStrictEncrypt.class); + + public ClickhouseDestinationStrictEncrypt() { + super(ClickhouseDestination.sshWrappedDestination()); + } + + @Override + public ConnectorSpecification modifySpec(final ConnectorSpecification originalSpec) { + final ConnectorSpecification spec = Jsons.clone(originalSpec); + ((ObjectNode) spec.getConnectionSpecification().get("properties")).remove("ssl"); + return spec; + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new ClickhouseDestinationStrictEncrypt(); + LOGGER.info("starting destination: {}", ClickhouseDestinationStrictEncrypt.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", ClickhouseDestinationStrictEncrypt.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java new file mode 100644 index 0000000000000..f59bfdcaaf5bf --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Disabled; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.ClickHouseContainer; + +public class ClickhouseDestinationStrictEncryptAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestinationStrictEncryptAcceptanceTest.class); + + private static final String DB_NAME = "default"; + + private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private ClickHouseContainer db; + + public static final Integer HTTP_PORT = 8123; + public static final Integer NATIVE_PORT = 9000; + public static final Integer HTTPS_PORT = 8443; + public static final Integer NATIVE_SECURE_PORT = 9440; + + @Override + protected String getImageName() { + return "airbyte/destination-clickhouse-strict-encrypt:dev"; + } + + @Override + protected boolean supportsNormalization() { + return true; + } + + @Override + protected boolean supportsDBT() { + return false; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get("database") == null) { + return null; + } + return config.get("database").asText(); + } + + @Override + protected JsonNode getConfig() { + // Note: ClickHouse official JDBC driver uses HTTP protocol, its default port is 8123 + // dbt clickhouse adapter uses native protocol, its default port is 9000 + // Since we disabled normalization and dbt test, we only use the JDBC port here. + return Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getMappedPort(HTTPS_PORT)) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password").put("ssl", false); + return clone; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + final JdbcDatabase jdbcDB = getDatabase(getConfig()); + return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList()); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + result.add(resolved.toUpperCase()); + } + return result; + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + final String jdbcStr = String.format("jdbc:clickhouse://%s:%s/%s?ssl=true&sslmode=none", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()); + return Databases.createJdbcDatabase( + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + jdbcStr, + ClickhouseDestination.DRIVER_CLASS); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + db = (ClickHouseContainer) new ClickHouseContainer("yandex/clickhouse-server") + .withExposedPorts(HTTP_PORT, NATIVE_PORT, HTTPS_PORT, NATIVE_SECURE_PORT) + .withClasspathResourceMapping("config.xml", "/etc/clickhouse-server/config.xml", BindMode.READ_ONLY) + .withClasspathResourceMapping("server.crt", "/etc/clickhouse-server/server.crt", BindMode.READ_ONLY) + .withClasspathResourceMapping("server.key", "/etc/clickhouse-server/server.key", BindMode.READ_ONLY) + .withClasspathResourceMapping("dhparam.pem", "/etc/clickhouse-server/dhparam.pem", BindMode.READ_ONLY); + db.start(); + + LOGGER.info(String.format("Clickhouse server container port mapping: %d -> %d, %d -> %d", + HTTP_PORT, db.getMappedPort(HTTP_PORT), + HTTPS_PORT, db.getMappedPort(HTTPS_PORT))); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + db.stop(); + db.close(); + } + + /** + * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, + * so we skip this test for now. + * + * Ref: https://github.com/dbt-labs/dbt-core/issues/3905 + * + * @throws Exception + */ + @Disabled + public void testCustomDbtTransformations() throws Exception { + super.testCustomDbtTransformations(); + } + + @Disabled + public void testCustomDbtTransformationsFailure() throws Exception {} + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testIncrementalDedupeSync() throws Exception { + super.testIncrementalDedupeSync(); + } + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { + super.testSyncWithNormalization(messagesFilename, catalogFilename); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml new file mode 100644 index 0000000000000..9b7432e4f240b --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml @@ -0,0 +1,1161 @@ + + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + 8123 + + + 9000 + + + 9004 + + + 9005 + + + 8443 + + + 9440 + + + 9011 + + + 9009 + + + + + + + + + + + + :: + + + 0.0.0.0 + + + ::1 + 127.0.0.1 + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + + true + + + AcceptCertificateHandler + + + + + true + true + sslv2,sslv3 + true + + + + AcceptCertificateHandler + + + + + + + + + 100 + + + 0 + + + + 10000 + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + 1000 + + + 134217728 + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + default + + + + + + + + + + + + default + + + + + + + + + true + + + false + + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + + + + + + + + + + + + + + + localhost + 9000 + + + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + + 127.0.0.1 + 9000 + + + + + 127.0.0.2 + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + + + + + + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + + + system + metric_log
+ 7500 + 1000 +
+ + + + system + asynchronous_metric_log
+ + 7000 +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + + + + system + crash_log
+ + + 1000 +
+ + + + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + + \1(???) + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + +
diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem new file mode 100644 index 0000000000000..2a862dd18e823 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem @@ -0,0 +1,13 @@ +-----BEGIN DH PARAMETERS----- +MIICCAKCAgEArsn0Y1ZPxaUNw4QREGoCFSWUhg05CVgDIlhQ42ixrIRPW+WduZwO +KqnCohYkMQvFM0J0s4laNXhWXjrEYtnzwp81M7t/3gmXxkglyE4gk9BhpmLpv/JU +TLVhjWLggOZTsEWyiR4REUvJ1IvaQ/K9RGoOSw4tgyW8gHGLjPHxhTPtp5ZfE5TJ +OQZsByLcQbqji3jCoZeNRGcOjMPri4A0u8cXlHQr3/t49G/nE2oC296MVAZNkdjt +mDQmNX8Ej5dm6F0ZWYFptgxkJknaBDjsJh+ga8SKG6dZvqEMvTqHLTOV8h2uFei9 +Gm4DtKWf8x232s9t+aGOF+qsPss1lM8spYTnY9B6jdEzEwEiXFBogG763lJNJpLv +nsk8YofO6hJrgiKJWiSbR50qo6us1cq191mFDBWO8yWSjfXgf5HpzutO1hVJPKyS ++3VTt3ZfRFBJZozWlQzddurwd5Wr+D3JR8E9mz7YukEA7iLwM3nZLcF1b35cYCRs +9Q59ezxyveVtWAcTBBzRzNGr3mf9LYoumd5o3jFJsLLWAHfDZXey7n6tBJUh6a6N +ChOCeoavhzlv3lx2+C19ZKkTKU+Z/cCjnW9530MVFtlXFqihIaVAB/ecAF0ZsPNB +pg5I+U4TR0J684eoM4LgMo0ydq7G9g+WbgQ+aD0C7CcEAV7t8iuj9IMCAQI= +-----END DH PARAMETERS----- diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt new file mode 100644 index 0000000000000..0aeb3cdc4027e --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDCTCCAfGgAwIBAgIUcuYIRxO5jbEzvookG0MTrb2jqDUwDQYJKoZIhvcNAQEL +BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTIxMTEwNzAzNDI0MloXDTIyMTEw +NzAzNDI0MlowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF +AAOCAQ8AMIIBCgKCAQEAmKDFGjs1/TpNv7ZOB9LTeSYOIjo50An4ahYO+xTGVQJS +SajsNs2NkZWXRUe+WOGANFjTWxTGs/vYWlJ/gC/0GckcuSyqWAShVfZuYHQsIFAG +yFYTAcXQ9bJPPiNTOwq00WqnhgQZ121EGiIYnxZ8XqR1CPS4fXMDFr5lSSrlu/yM +vzRQOEbiiKcrRZvikiFijFDt759J4r3Pr4+tmZ2GJe28ZI9sMmeLigup1/awna9i +7j1yS5iHwFd0xVXSVkPdym3crNDPG9Iy2QicDqijJpqDzKErwR71839x0E8B53HF +PL6U6KtACViDklPYn+IWsAuRV9o56/X3X6tmt8YjBQIDAQABo1MwUTAdBgNVHQ4E +FgQUNoWDQBHyh0EUzpvEtbktJwvPol8wHwYDVR0jBBgwFoAUNoWDQBHyh0EUzpvE +tbktJwvPol8wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAJZLh +BdmGt85Ru8ACtrJ8P/59GjtLBXCDAyrQeR+SfPShoL3xx69ldh9flRWOgODxHQiX +tGMDPbXZ5dRUAPmwOEjDecYk5H6RXKWa1BqX5yAa1grfkZsj1+9Pv7DneQae/owq +RAVd0SWjomFL0oElUvHhk7AMpHu7XK9f3Vj2JPFcieD7SAotn6i3IFKFrBTz14T4 +6O0tgEUdcujr0DsIxZsJRt2ITXgdJCOT+ohDWTdqB5+vh+HxfaSxAPssLBwD1bFF +m3mEBNCkrFCpX/4+uYw559ah9KRAN8nVpzl1fKZLIms/6UCdA65FEGr+7m39UDa2 +leTb3J/1adaimGBHkw== +-----END CERTIFICATE----- diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key new file mode 100644 index 0000000000000..6d29bcb797faf --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCYoMUaOzX9Ok2/ +tk4H0tN5Jg4iOjnQCfhqFg77FMZVAlJJqOw2zY2RlZdFR75Y4YA0WNNbFMaz+9ha +Un+AL/QZyRy5LKpYBKFV9m5gdCwgUAbIVhMBxdD1sk8+I1M7CrTRaqeGBBnXbUQa +IhifFnxepHUI9Lh9cwMWvmVJKuW7/Iy/NFA4RuKIpytFm+KSIWKMUO3vn0nivc+v +j62ZnYYl7bxkj2wyZ4uKC6nX9rCdr2LuPXJLmIfAV3TFVdJWQ93Kbdys0M8b0jLZ +CJwOqKMmmoPMoSvBHvXzf3HQTwHnccU8vpToq0AJWIOSU9if4hawC5FX2jnr9fdf +q2a3xiMFAgMBAAECggEABRh4/CM17jpgFiJ0AHbdcJIArsva2kM68+/AmEldRcFK +FgN5v2qla1LfNgTrKfR9X0IH7GaJK8EgvAM9Fn6DW8kHGHeRSFb/O+t6S+YVAw8o +6s9U2x5Ll6hYVO7A/Jui5cr1xNemNTcEqwqwX79Ub6R189KXxjhsxfmdUBAKk3gR +1yOsVhsN0B2FnVCF8V1Ad3G8AUtZ3Akrclex8kRSMIKYCDhhn8p1kNWQT7MSYXnD +YGJdO/a3ZdXUJvUG7W3J/lX6rS9A4dDA8S9AgQ06XroODdm+NrP2qFfseOeyZpYs +m0mRfSNasWXStU+IyjK/wuYLGS7YgeLASHReUMUWgQKBgQDKItQqpF4SxwfJCyET +W2HYcU6ho5B37EPcF7gBPFwg+/8Mr0vW1cr3HgCycZpud1T4v0Hcd0ZyPkEFZ9J1 +HAzmWyzMXyvBHrb5uUeOO3SHqOBKhhXCj+EVJUmgTjKmMMUSj2NGB5HycTYghCo8 +faGRe7knOXNBHbKayb5p8b5kSQKBgQDBTKMiF/V1D2O+j9/8Y8Voy6iBS6pMLzgF +5wk/jplTwBIxBE8RXRHkEOIdoO74I/8NvB4HSGRpN4ftoqPneGA4AUaPsvn9e76z ++7HhVCvAPCgdNMi2XIL76f0YFJPIBisnggun87hyAseUw/R6s9szlA1iQM6r94jX +S+eD5P0Q3QKBgFf9MT4erpSd7tWk8pDqhn0hVYknGgwZ3LBB4ucmzzPMfgmXhPvP +tLo9ZTBII2FSsLpXBuRhR1kToFoqB4LS4wqjRazxAKrgI+YwsOVJHECxUnEeTPqU +hvYddR9C6ulM8XxSznP9d5qBjX46CeirB1m88awRZgpCUyzuBOU5RheBAoGAWrbA +iCt+QeVrBe83Spb3+eo5thPwY7h1Li/yoyUkx60H3IENKjTnRIS32PfBrioWdDeo +T/qlRMuOuvLswKA5Z48RsjZoI5GDOawRGpIJxjl1Cd/PoeVggyCYakid4e0jK3NY +TQWPtdGgICyl+z+Uy2vbrBSF6SZNzdwNVlSMfvECgYAoPb1ihbS0+mSKvtBaw9dV +fWP0ktVEEIw5BLEeD+75goYKTTNtJ7hV47aAnEcpSOlO77I3G8E5v9YL/jlXMfvF +3wi2dfGgDbKuXCtCQS/BqnqDhw9Py3sg9XzgFdLM7Y0Z7JJHTDyV9b/PfAMk6bgn +SKOWqhX1fJgG5kgymk1YEg== +-----END PRIVATE KEY----- diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java new file mode 100644 index 0000000000000..ff1932d790567 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.ConnectorSpecification; +import org.junit.jupiter.api.Test; + +class ClickhouseDestinationStrictEncryptTest { + + @Test + void testGetSpec() throws Exception { + System.out.println(new ClickhouseDestinationStrictEncrypt().spec().getConnectionSpecification()); + assertEquals(Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class), + new ClickhouseDestinationStrictEncrypt().spec()); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json new file mode 100644 index 0000000000000..3d15378c40808 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json @@ -0,0 +1,165 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", + "supportsIncremental": true, + "supportsNormalization": true, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ClickHouse Destination Spec", + "type": "object", + "required": ["host", "port", "database", "username"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "JDBC port (not the native port) of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 8123, + "examples": ["8123"], + "order": 1 + }, + "database": { + "title": "DB Name", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "tunnel_method": { + "type": "object", + "title": "SSH Tunnel Method", + "description": "Whether to initiate an SSH tunnel before connecting to the database, and if so, which kind of authentication to use.", + "oneOf": [ + { + "title": "No Tunnel", + "required": ["tunnel_method"], + "properties": { + "tunnel_method": { + "description": "No ssh tunnel needed to connect to database", + "type": "string", + "const": "NO_TUNNEL", + "order": 0 + } + } + }, + { + "title": "SSH Key Authentication", + "required": [ + "tunnel_method", + "tunnel_host", + "tunnel_port", + "tunnel_user", + "ssh_key" + ], + "properties": { + "tunnel_method": { + "description": "Connect through a jump server tunnel host using username and ssh key", + "type": "string", + "const": "SSH_KEY_AUTH", + "order": 0 + }, + "tunnel_host": { + "title": "SSH Tunnel Jump Server Host", + "description": "Hostname of the jump server host that allows inbound ssh tunnel.", + "type": "string", + "order": 1 + }, + "tunnel_port": { + "title": "SSH Connection Port", + "description": "Port on the proxy/jump server that accepts inbound ssh connections.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 22, + "examples": ["22"], + "order": 2 + }, + "tunnel_user": { + "title": "SSH Login Username", + "description": "OS-level username for logging into the jump server host.", + "type": "string", + "order": 3 + }, + "ssh_key": { + "title": "SSH Private Key", + "description": "OS-level user account ssh key credentials in RSA PEM format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )", + "type": "string", + "airbyte_secret": true, + "multiline": true, + "order": 4 + } + } + }, + { + "title": "Password Authentication", + "required": [ + "tunnel_method", + "tunnel_host", + "tunnel_port", + "tunnel_user", + "tunnel_user_password" + ], + "properties": { + "tunnel_method": { + "description": "Connect through a jump server tunnel host using username and password authentication", + "type": "string", + "const": "SSH_PASSWORD_AUTH", + "order": 0 + }, + "tunnel_host": { + "title": "SSH Tunnel Jump Server Host", + "description": "Hostname of the jump server host that allows inbound ssh tunnel.", + "type": "string", + "order": 1 + }, + "tunnel_port": { + "title": "SSH Connection Port", + "description": "Port on the proxy/jump server that accepts inbound ssh connections.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 22, + "examples": ["22"], + "order": 2 + }, + "tunnel_user": { + "title": "SSH Login Username", + "description": "OS-level username for logging into the jump server host", + "type": "string", + "order": 3 + }, + "tunnel_user_password": { + "title": "Password", + "description": "OS-level password for logging into the jump server host", + "type": "string", + "airbyte_secret": true, + "order": 4 + } + } + } + ] + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/.dockerignore b/airbyte-integrations/connectors/destination-clickhouse/.dockerignore new file mode 100644 index 0000000000000..65c7d0ad3e73c --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-clickhouse/Dockerfile b/airbyte-integrations/connectors/destination-clickhouse/Dockerfile new file mode 100644 index 0000000000000..1a062e9892b0f --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/Dockerfile @@ -0,0 +1,11 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte +ENV APPLICATION destination-clickhouse + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-clickhouse diff --git a/airbyte-integrations/connectors/destination-clickhouse/README.md b/airbyte-integrations/connectors/destination-clickhouse/README.md new file mode 100644 index 0000000000000..38646535af5c8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/README.md @@ -0,0 +1,68 @@ +# Destination Clickhouse + +This is the repository for the Clickhouse destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/clickhouse). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-clickhouse:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-clickhouse:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-clickhouse:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-clickhouse:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/clickhouse`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/clickhouseDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-clickhouse/bootstrap.md b/airbyte-integrations/connectors/destination-clickhouse/bootstrap.md new file mode 100644 index 0000000000000..13d96b5951108 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/bootstrap.md @@ -0,0 +1,22 @@ +# ClickHouse + +## Overview + +ClickHouse is a fast open-source column-oriented database management system that allows generating analytical data reports in real-time using SQL queries. + +## Endpoints + +This destination connector uses ClickHouse official JDBC driver, which uses HTTP as protocol. [https://github.com/ClickHouse/clickhouse-jdbc](https://github.com/ClickHouse/clickhouse-jdbc) + +## Quick Notes + +- ClickHouse JDBC driver uses HTTP protocal (default 8123) but [dbt clickhouse adapter](https://github.com/silentsokolov/dbt-clickhouse) use TCP protocal (default 9000). + +- This connector doesn't support nested streams and schema change yet. + +- The community [dbt clickhouse adapter](https://github.com/silentsokolov/dbt-clickhouse) has some bugs haven't been fixed yet, for example [https://github.com/silentsokolov/dbt-clickhouse/issues/20](https://github.com/silentsokolov/dbt-clickhouse/issues/20), so the dbt test is based on a fork [https://github.com/burmecia/dbt-clickhouse](https://github.com/burmecia/dbt-clickhouse). + +## API Reference + +The ClickHouse reference documents: [https://clickhouse.com/docs/en/](https://clickhouse.com/docs/en/) + diff --git a/airbyte-integrations/connectors/destination-clickhouse/build.gradle b/airbyte-integrations/connectors/destination-clickhouse/build.gradle new file mode 100644 index 0000000000000..43d63d7c606e9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/build.gradle @@ -0,0 +1,31 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.clickhouse.ClickhouseDestination' + applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation project(':airbyte-db:lib') + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-integrations:connectors:destination-jdbc') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + // https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc + implementation 'ru.yandex.clickhouse:clickhouse-jdbc:0.3.1-patch' + + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + testImplementation 'org.testcontainers:clickhouse:1.16.2' + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-clickhouse') + integrationTestJavaImplementation project(':airbyte-workers') + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + integrationTestJavaImplementation "org.testcontainers:clickhouse:1.16.2" +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java new file mode 100644 index 0000000000000..95c0b767b1436 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.ssh.SshWrappedDestination; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClickhouseDestination extends AbstractJdbcDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestination.class); + + public static final String DRIVER_CLASS = "ru.yandex.clickhouse.ClickHouseDriver"; + + public static final List HOST_KEY = List.of("host"); + public static final List PORT_KEY = List.of("port"); + + private static final String PASSWORD = "password"; + + public static Destination sshWrappedDestination() { + return new SshWrappedDestination(new ClickhouseDestination(), HOST_KEY, PORT_KEY); + } + + public ClickhouseDestination() { + super(DRIVER_CLASS, new ClickhouseSQLNameTransformer(), new ClickhouseSqlOperations()); + } + + @Override + public JsonNode toJdbcConfig(final JsonNode config) { + final List additionalParameters = new ArrayList<>(); + + final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:clickhouse://%s:%s/%s?", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText())); + + if (!config.has("ssl") || config.get("ssl").asBoolean()) { + additionalParameters.add("ssl=true"); + additionalParameters.add("sslmode=none"); + } + + if (!additionalParameters.isEmpty()) { + additionalParameters.forEach(x -> jdbcUrl.append(x).append("&")); + } + + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put("username", config.get("username").asText()) + .put("jdbc_url", jdbcUrl.toString()); + + if (config.has(PASSWORD)) { + configBuilder.put(PASSWORD, config.get(PASSWORD).asText()); + } + + return Jsons.jsonNode(configBuilder.build()); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try (final JdbcDatabase database = getDatabase(config)) { + final NamingConventionTransformer namingResolver = getNamingResolver(); + final String outputSchema = namingResolver.getIdentifier(config.get("database").asText()); + attemptSQLCreateAndDropTableOperations(outputSchema, database, namingResolver, getSqlOperations()); + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception while checking connection: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); + } + } + + public static void main(String[] args) throws Exception { + final Destination destination = ClickhouseDestination.sshWrappedDestination(); + LOGGER.info("starting destination: {}", ClickhouseDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", ClickhouseDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java new file mode 100644 index 0000000000000..fca0dc91e4131 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.integrations.destination.ExtendedNameTransformer; + +public class ClickhouseSQLNameTransformer extends ExtendedNameTransformer { + + @Override + protected String applyDefaultCase(final String input) { + return input.toLowerCase(); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java new file mode 100644 index 0000000000000..b712a8cf0a7a2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.sql.SQLException; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import ru.yandex.clickhouse.ClickHouseConnection; +import ru.yandex.clickhouse.ClickHouseStatement; +import ru.yandex.clickhouse.domain.ClickHouseFormat; + +public class ClickhouseSqlOperations extends JdbcSqlOperations { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseSqlOperations.class); + + @Override + public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { + database.execute(String.format("CREATE DATABASE IF NOT EXISTS %s;\n", schemaName)); + } + + @Override + public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { + return String.format( + "CREATE TABLE IF NOT EXISTS %s.%s ( \n" + + "%s String,\n" + + "%s String,\n" + + "%s DateTime64(3, 'GMT') DEFAULT now(),\n" + + "PRIMARY KEY(%s)\n" + + ")\n" + + "ENGINE = MergeTree;\n", + schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_AB_ID); + } + + @Override + public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { + final StringBuilder appendedQueries = new StringBuilder(); + for (final String query : queries) { + appendedQueries.append(query); + } + database.execute(appendedQueries.toString()); + } + + @Override + public void insertRecordsInternal(final JdbcDatabase database, + final List records, + final String schemaName, + final String tmpTableName) + throws SQLException { + LOGGER.info("actual size of batch: {}", records.size()); + + if (records.isEmpty()) { + return; + } + + database.execute(connection -> { + File tmpFile = null; + Exception primaryException = null; + try { + tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); + writeBatchToFile(tmpFile, records); + + ClickHouseConnection conn = connection.unwrap(ClickHouseConnection.class); + ClickHouseStatement sth = conn.createStatement(); + sth.write() // Write API entrypoint + .table(String.format("%s.%s", schemaName, tmpTableName)) // where to write data + .data(tmpFile, ClickHouseFormat.CSV) // specify input + .send(); + + } catch (final Exception e) { + primaryException = e; + throw new RuntimeException(e); + } finally { + try { + if (tmpFile != null) { + Files.delete(tmpFile.toPath()); + } + } catch (final IOException e) { + if (primaryException != null) + e.addSuppressed(primaryException); + throw new RuntimeException(e); + } + } + }); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json new file mode 100644 index 0000000000000..6037b573394d5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -0,0 +1,58 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", + "supportsIncremental": true, + "supportsNormalization": true, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ClickHouse Destination Spec", + "type": "object", + "required": ["host", "port", "database", "username"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "JDBC port (not the native port) of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 8123, + "examples": ["8123"], + "order": 1 + }, + "database": { + "title": "DB Name", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "ssl": { + "title": "SSL Connection", + "description": "Encrypt data using SSL.", + "type": "boolean", + "default": false, + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..cf25f5211d54c --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Disabled; +import org.testcontainers.containers.ClickHouseContainer; + +public class ClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final String DB_NAME = "default"; + + private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private ClickHouseContainer db; + + @Override + protected String getImageName() { + return "airbyte/destination-clickhouse:dev"; + } + + @Override + protected boolean supportsNormalization() { + return true; + } + + @Override + protected boolean supportsDBT() { + return false; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get("database") == null) { + return null; + } + return config.get("database").asText(); + } + + @Override + protected JsonNode getConfig() { + // Note: ClickHouse official JDBC driver uses HTTP protocol, its default port is 8123 + // dbt clickhouse adapter uses native protocol, its default port is 9000 + // Since we disabled normalization and dbt test, we only use the JDBC port here. + return Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .put("ssl", false) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password"); + return clone; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + final JdbcDatabase jdbcDB = getDatabase(getConfig()); + return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList()); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + result.add(resolved.toUpperCase()); + } + return result; + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + return Databases.createJdbcDatabase( + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + ClickhouseDestination.DRIVER_CLASS); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + db = new ClickHouseContainer("yandex/clickhouse-server"); + db.start(); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + db.stop(); + db.close(); + } + + /** + * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, + * so we skip this test for now. + * + * Ref: https://github.com/dbt-labs/dbt-core/issues/3905 + * + * @throws Exception + */ + @Disabled + public void testCustomDbtTransformations() throws Exception { + super.testCustomDbtTransformations(); + } + + @Disabled + public void testCustomDbtTransformationsFailure() throws Exception {} + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testIncrementalDedupeSync() throws Exception { + super.testIncrementalDedupeSync(); + } + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { + super.testSyncWithNormalization(messagesFilename, catalogFilename); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..ed50b11027f19 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.functional.CheckedFunction; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.base.ssh.SshBastionContainer; +import io.airbyte.integrations.base.ssh.SshTunnel; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Disabled; +import org.testcontainers.containers.ClickHouseContainer; + +/** + * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file + * or with a password. + */ +public abstract class SshClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTest { + + public abstract SshTunnel.TunnelMethod getTunnelMethod(); + + private static final String DB_NAME = "default"; + + private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private ClickHouseContainer db; + private final SshBastionContainer bastion = new SshBastionContainer(); + + @Override + protected String getImageName() { + return "airbyte/destination-clickhouse:dev"; + } + + @Override + protected boolean supportsNormalization() { + return true; + } + + @Override + protected boolean supportsDBT() { + return false; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get("database") == null) { + return null; + } + return config.get("database").asText(); + } + + @Override + protected JsonNode getConfig() throws Exception { + return bastion.getTunnelConfig(getTunnelMethod(), bastion.getBasicDbConfigBuider(db, DB_NAME) + .put("schema", DB_NAME)); + } + + @Override + protected JsonNode getFailCheckConfig() throws Exception { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password"); + return clone; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws Exception { + return SshTunnel.sshWrap( + getConfig(), + ClickhouseDestination.HOST_KEY, + ClickhouseDestination.PORT_KEY, + (CheckedFunction, Exception>) mangledConfig -> getDatabase(mangledConfig) + .query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList())); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + result.add(resolved.toUpperCase()); + } + return result; + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + return Databases.createJdbcDatabase( + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + ClickhouseDestination.DRIVER_CLASS); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + bastion.initAndStartBastion(); + db = (ClickHouseContainer) new ClickHouseContainer("yandex/clickhouse-server").withNetwork(bastion.getNetWork()); + db.start(); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + bastion.stopAndCloseContainers(db); + } + + /** + * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, + * so we skip this test for now. + * + * Ref: https://github.com/dbt-labs/dbt-core/issues/3905 + * + * @throws Exception + */ + @Disabled + public void testCustomDbtTransformations() throws Exception { + super.testCustomDbtTransformations(); + } + + @Disabled + public void testCustomDbtTransformationsFailure() throws Exception {} + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testIncrementalDedupeSync() throws Exception { + super.testIncrementalDedupeSync(); + } + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { + super.testSyncWithNormalization(messagesFilename, catalogFilename); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..7fd4f32699855 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.integrations.base.ssh.SshTunnel; + +public class SshKeyClickhouseDestinationAcceptanceTest extends SshClickhouseDestinationAcceptanceTest { + + @Override + public SshTunnel.TunnelMethod getTunnelMethod() { + return SshTunnel.TunnelMethod.SSH_KEY_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java new file mode 100644 index 0000000000000..1bb01fb490d4d --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.integrations.base.ssh.SshTunnel; + +public class SshPasswordClickhouseDestinationAcceptanceTest extends SshClickhouseDestinationAcceptanceTest { + + @Override + public SshTunnel.TunnelMethod getTunnelMethod() { + return SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java new file mode 100644 index 0000000000000..edad4528f29b8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.time.Instant; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.ClickHouseContainer; + +public class ClickhouseDestinationTest { + + private static final String DB_NAME = "default"; + private static final String STREAM_NAME = "id_and_name"; + private static final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private static ClickHouseContainer db; + private static ConfiguredAirbyteCatalog catalog; + private static JsonNode config; + + @BeforeAll + static void init() { + db = new ClickHouseContainer("yandex/clickhouse-server"); + db.start(); + } + + @BeforeEach + void setup() { + catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + STREAM_NAME, + DB_NAME, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)))); + + config = Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .put("ssl", false) + .build()); + } + + @AfterAll + static void cleanUp() { + db.stop(); + db.close(); + } + + @Test + void sanityTest() throws Exception { + final Destination dest = new ClickhouseDestination(); + final AirbyteMessageConsumer consumer = dest.getConsumer(config, catalog, + Destination::defaultOutputRecordCollector); + final List expectedRecords = generateRecords(10); + + consumer.start(); + expectedRecords.forEach(m -> { + try { + consumer.accept(m); + } catch (final Exception e) { + throw new RuntimeException(e); + } + }); + consumer.accept(new AirbyteMessage() + .withType(Type.STATE) + .withState(new AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of(DB_NAME + "." + STREAM_NAME, 10))))); + consumer.close(); + + final JdbcDatabase database = Databases.createJdbcDatabase( + config.get("username").asText(), + config.get("password").asText(), + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + ClickhouseDestination.DRIVER_CLASS); + + final List actualRecords = database.bufferedResultSetQuery( + connection -> connection.createStatement().executeQuery( + String.format("SELECT * FROM %s.%s;", DB_NAME, + namingResolver.getRawTableName(STREAM_NAME))), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + + assertEquals( + expectedRecords.stream().map(AirbyteMessage::getRecord) + .map(AirbyteRecordMessage::getData).collect(Collectors.toList()), + actualRecords.stream() + .map(o -> o.get("_airbyte_data").asText()) + .map(Jsons::deserialize) + .sorted(Comparator.comparingInt(x -> x.get("id").asInt())) + .collect(Collectors.toList())); + } + + private List generateRecords(final int n) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(STREAM_NAME) + .withNamespace(DB_NAME) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "test name " + i))))) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java index 3a287286c01b7..2a06941fd6713 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java @@ -47,7 +47,8 @@ public enum DestinationType { ORACLE, POSTGRES, REDSHIFT, - SNOWFLAKE + SNOWFLAKE, + CLICKHOUSE } public DefaultNormalizationRunner(final DestinationType destinationType, final ProcessFactory processFactory, final String normalizationImageName) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 687527706bd59..a71b55a23319a 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -30,6 +30,8 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-postgres-strict-encrypt", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) .put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE)) + .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) + .put("airbyte/destination-clickhouse-strict-encrypt", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .build(); public static NormalizationRunner create(final String connectorImageName, final ProcessFactory processFactory) { diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index cef705c7ea2a2..c2026efcf36b8 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -159,6 +159,7 @@ * [Amazon SQS](integrations/destinations/amazon-sqs.md) * [AzureBlobStorage](integrations/destinations/azureblobstorage.md) * [BigQuery](integrations/destinations/bigquery.md) + * [ClickHouse](integrations/destinations/clickhouse.md) * [Databricks](integrations/destinations/databricks.md) * [DynamoDB](integrations/destinations/dynamodb.md) * [Elasticsearch](integrations/destinations/elasticsearch.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index f406cad9cbfb0..6c3c1b8323137 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -138,6 +138,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [AzureBlobStorage](destinations/azureblobstorage.md) | Alpha | | [BigQuery](destinations/bigquery.md) | Certified | | [Chargify \(Keen\)](destinations/chargify.md) | Alpha | +| [ClickHouse](destinations/clickhouse.md) | Alpha | | [Databricks](destinations/databricks.md) | Beta | | [Elasticsearch](destinations/elasticsearch.md) | Alpha | | [Google Cloud Storage \(GCS\)](destinations/gcs.md) | Alpha | diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md new file mode 100644 index 0000000000000..9312e173964b2 --- /dev/null +++ b/docs/integrations/destinations/clickhouse.md @@ -0,0 +1,83 @@ + +# ClickHouse + +## Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Deduped History | Yes | | +| Namespaces | Yes | | + +#### Output Schema + +Each stream will be output into its own table in ClickHouse. Each table will contain 3 columns: + +* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in ClickHouse is `String`. +* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in ClickHouse is `DateTime64`. +* `_airbyte_data`: a json blob representing with the event data. The column type in ClickHouse is `String`. + +## Getting Started \(Airbyte Cloud\) + +Airbyte Cloud only supports connecting to your ClickHouse instance with SSL or TLS encryption, which is supported by [ClickHouse JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc). + +## Getting Started \(Airbyte Open-Source\) + +#### Requirements + +To use the ClickHouse destination, you'll need: + +* A ClickHouse server version 21.8.10.19 or above + +#### Configure Network Access + +Make sure your ClickHouse database can be accessed by Airbyte. If your database is within a VPC, you may need to allow access from the IP you're using to expose Airbyte. + +#### **Permissions** + +You need a ClickHouse user with the following permissions: + +* can create tables and write rows. +* can create databases e.g: + +You can create such a user by running: + +``` +GRANT CREATE ON * TO airbyte_user; +``` + +You can also use a pre-existing user but we highly recommend creating a dedicated user for Airbyte. + +#### Target Database + +You will need to choose an existing database or create a new database that will be used to store synced data from Airbyte. + +### Setup the ClickHouse Destination in Airbyte + +You should now have all the requirements needed to configure ClickHouse as a destination in the UI. You'll need the following information to configure the ClickHouse destination: + +* **Host** +* **Port** (JDBC HTTP port, not the native port) +* **Username** +* **Password** +* **Database** + +## Naming Conventions + +From [ClickHouse SQL Identifiers syntax](https://clickhouse.com/docs/en/sql-reference/syntax/): + +* SQL identifiers and key words must begin with a letter \(a-z, but also letters with diacritical marks and non-Latin letters\) or an underscore \(\_\). +* Subsequent characters in an identifier or key word can be letters, underscores, digits \(0-9\). +* Identifiers can be quoted or non-quoted. The latter is preferred. +* If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, "id", `id`. +* If you want to write portable applications you are advised to always quote a particular name or never quote it. + +Therefore, Airbyte ClickHouse destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special characters. + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2021-11-04 | [\#7620](https://github.com/airbytehq/airbyte/pull/7620) | Add ClickHouse destination | +