Skip to content

Commit bbcd461

Browse files
burmeciaalexandertsukanovmarcosmarxm
authored
🎉 New Destination: ClickHouse (#7620)
* add ClickHouse destination * update docs * format code * code improvement as per code review * add ssh tunneling and ssl/tls support and code enhancement * merge from master * disable testCustomDbtTransformationsFailure test * fix string format bug * fix reserved keywords bug and disable dbt * disable dbt in expect result * add type hints * bump connector version Co-authored-by: Alexander Tsukanov <alexander.tsukanovvv@gmail.com> Co-authored-by: Marcos Marx <marcosmarxm@gmail.com>
1 parent cba7285 commit bbcd461

File tree

101 files changed

+5675
-20
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+5675
-20
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"destinationDefinitionId": "ce0d828e-1dc4-496c-b122-2da42e637e48",
3+
"name": "Clickhouse",
4+
"dockerRepository": "airbyte/destination-clickhouse",
5+
"dockerImageTag": "0.1.0",
6+
"documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse"
7+
}

airbyte-config/init/src/main/resources/seed/destination_definitions.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434
dockerImageTag: 0.2.0
3535
documentationUrl: https://docs.airbyte.io/integrations/destinations/keen
3636
icon: chargify.svg
37+
- name: Clickhouse
38+
destinationDefinitionId: ce0d828e-1dc4-496c-b122-2da42e637e48
39+
dockerRepository: airbyte/destination-clickhouse
40+
dockerImageTag: 0.1.0
41+
documentationUrl: https://docs.airbyte.io/integrations/destinations/clickhouse
3742
- name: DynamoDB
3843
destinationDefinitionId: 8ccd8909-4e99-4141-b48d-4984b70b2d89
3944
dockerRepository: airbyte/destination-dynamodb

airbyte-config/init/src/main/resources/seed/destination_specs.yaml

+159
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,165 @@
525525
supported_destination_sync_modes:
526526
- "overwrite"
527527
- "append"
528+
- dockerImage: "airbyte/destination-clickhouse:0.1.0"
529+
spec:
530+
documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse"
531+
connectionSpecification:
532+
$schema: "http://json-schema.org/draft-07/schema#"
533+
title: "ClickHouse Destination Spec"
534+
type: "object"
535+
required:
536+
- "host"
537+
- "port"
538+
- "database"
539+
- "username"
540+
additionalProperties: true
541+
properties:
542+
host:
543+
title: "Host"
544+
description: "Hostname of the database."
545+
type: "string"
546+
order: 0
547+
port:
548+
title: "Port"
549+
description: "JDBC port (not the native port) of the database."
550+
type: "integer"
551+
minimum: 0
552+
maximum: 65536
553+
default: 8123
554+
examples:
555+
- "8123"
556+
order: 1
557+
database:
558+
title: "DB Name"
559+
description: "Name of the database."
560+
type: "string"
561+
order: 2
562+
username:
563+
title: "User"
564+
description: "Username to use to access the database."
565+
type: "string"
566+
order: 3
567+
password:
568+
title: "Password"
569+
description: "Password associated with the username."
570+
type: "string"
571+
airbyte_secret: true
572+
order: 4
573+
ssl:
574+
title: "SSL Connection"
575+
description: "Encrypt data using SSL."
576+
type: "boolean"
577+
default: false
578+
order: 5
579+
tunnel_method:
580+
type: "object"
581+
title: "SSH Tunnel Method"
582+
description: "Whether to initiate an SSH tunnel before connecting to the\
583+
\ database, and if so, which kind of authentication to use."
584+
oneOf:
585+
- title: "No Tunnel"
586+
required:
587+
- "tunnel_method"
588+
properties:
589+
tunnel_method:
590+
description: "No ssh tunnel needed to connect to database"
591+
type: "string"
592+
const: "NO_TUNNEL"
593+
order: 0
594+
- title: "SSH Key Authentication"
595+
required:
596+
- "tunnel_method"
597+
- "tunnel_host"
598+
- "tunnel_port"
599+
- "tunnel_user"
600+
- "ssh_key"
601+
properties:
602+
tunnel_method:
603+
description: "Connect through a jump server tunnel host using username\
604+
\ and ssh key"
605+
type: "string"
606+
const: "SSH_KEY_AUTH"
607+
order: 0
608+
tunnel_host:
609+
title: "SSH Tunnel Jump Server Host"
610+
description: "Hostname of the jump server host that allows inbound\
611+
\ ssh tunnel."
612+
type: "string"
613+
order: 1
614+
tunnel_port:
615+
title: "SSH Connection Port"
616+
description: "Port on the proxy/jump server that accepts inbound ssh\
617+
\ connections."
618+
type: "integer"
619+
minimum: 0
620+
maximum: 65536
621+
default: 22
622+
examples:
623+
- "22"
624+
order: 2
625+
tunnel_user:
626+
title: "SSH Login Username"
627+
description: "OS-level username for logging into the jump server host."
628+
type: "string"
629+
order: 3
630+
ssh_key:
631+
title: "SSH Private Key"
632+
description: "OS-level user account ssh key credentials in RSA PEM\
633+
\ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )"
634+
type: "string"
635+
airbyte_secret: true
636+
multiline: true
637+
order: 4
638+
- title: "Password Authentication"
639+
required:
640+
- "tunnel_method"
641+
- "tunnel_host"
642+
- "tunnel_port"
643+
- "tunnel_user"
644+
- "tunnel_user_password"
645+
properties:
646+
tunnel_method:
647+
description: "Connect through a jump server tunnel host using username\
648+
\ and password authentication"
649+
type: "string"
650+
const: "SSH_PASSWORD_AUTH"
651+
order: 0
652+
tunnel_host:
653+
title: "SSH Tunnel Jump Server Host"
654+
description: "Hostname of the jump server host that allows inbound\
655+
\ ssh tunnel."
656+
type: "string"
657+
order: 1
658+
tunnel_port:
659+
title: "SSH Connection Port"
660+
description: "Port on the proxy/jump server that accepts inbound ssh\
661+
\ connections."
662+
type: "integer"
663+
minimum: 0
664+
maximum: 65536
665+
default: 22
666+
examples:
667+
- "22"
668+
order: 2
669+
tunnel_user:
670+
title: "SSH Login Username"
671+
description: "OS-level username for logging into the jump server host"
672+
type: "string"
673+
order: 3
674+
tunnel_user_password:
675+
title: "Password"
676+
description: "OS-level password for logging into the jump server host"
677+
type: "string"
678+
airbyte_secret: true
679+
order: 4
680+
supportsIncremental: true
681+
supportsNormalization: true
682+
supportsDBT: false
683+
supported_destination_sync_modes:
684+
- "overwrite"
685+
- "append"
686+
- "append_dedup"
528687
- dockerImage: "airbyte/destination-dynamodb:0.1.0"
529688
spec:
530689
documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb"

airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java

+4
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ public static Database createOracleDatabase(final String username, final String
7575
return createDatabase(username, password, jdbcConnectionString, "oracle.jdbc.OracleDriver", SQLDialect.DEFAULT);
7676
}
7777

78+
public static Database createClickhouseDatabase(final String username, final String password, final String jdbcConnectionString) {
79+
return createDatabase(username, password, jdbcConnectionString, "ru.yandex.clickhouse.ClickHouseDriver", SQLDialect.DEFAULT);
80+
}
81+
7882
public static Database createMariaDbDatabase(final String username, final String password, final String jdbcConnectionString) {
7983
return createDatabase(username, password, jdbcConnectionString, "org.mariadb.jdbc.Driver", SQLDialect.MARIADB);
8084
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
FROM fishtownanalytics/dbt:0.21.0
2+
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte
3+
4+
# Install SSH Tunneling dependencies
5+
RUN apt-get update && apt-get install -y jq sshpass
6+
WORKDIR /airbyte
7+
COPY entrypoint.sh .
8+
COPY build/sshtunneling.sh .
9+
10+
WORKDIR /airbyte/normalization_code
11+
COPY normalization ./normalization
12+
COPY setup.py .
13+
COPY dbt-project-template/ ./dbt-template/
14+
15+
# Install python dependencies
16+
WORKDIR /airbyte/base_python_structs
17+
RUN pip install .
18+
19+
WORKDIR /airbyte/normalization_code
20+
RUN pip install .
21+
22+
WORKDIR /airbyte/normalization_code/dbt-template/
23+
#RUN pip install dbt-clickhouse
24+
# dbt-clickhouse adapter has some bugs, use our own just for now
25+
# https://github.com/silentsokolov/dbt-clickhouse/issues/20
26+
RUN pip install git+https://github.com/burmecia/dbt-clickhouse.git
27+
# Download external dbt dependencies
28+
RUN dbt deps
29+
30+
WORKDIR /airbyte
31+
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
32+
ENTRYPOINT ["/airbyte/entrypoint.sh"]
33+
34+
LABEL io.airbyte.name=airbyte/normalization-clickhouse
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# This file is necessary to install dbt-utils with dbt deps
2+
# the content will be overwritten by the transform function
3+
4+
# Name your package! Package names should contain only lowercase characters
5+
# and underscores. A good package name should reflect your organization's
6+
# name or the intended use of these models
7+
name: 'airbyte_utils'
8+
version: '1.0'
9+
config-version: 2
10+
11+
# This setting configures which "profile" dbt uses for this project. Profiles contain
12+
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
13+
profile: 'normalize'
14+
15+
# These configurations specify where dbt should look for different types of files.
16+
# The `source-paths` config, for example, states that source models can be found
17+
# in the "models/" directory. You probably won't need to change these!
18+
source-paths: ["models"]
19+
docs-paths: ["docs"]
20+
analysis-paths: ["analysis"]
21+
test-paths: ["tests"]
22+
data-paths: ["data"]
23+
macro-paths: ["macros"]
24+
25+
target-path: "../build" # directory which will store compiled SQL files
26+
log-path: "../logs" # directory which will store DBT logs
27+
modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies
28+
29+
clean-targets: # directories to be removed by `dbt clean`
30+
- "build"
31+
- "dbt_modules"
32+
33+
quoting:
34+
database: true
35+
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
36+
# all schemas should be unquoted
37+
schema: false
38+
identifier: true
39+
40+
# You can define configurations for models in the `source-paths` directory here.
41+
# Using these configurations, you can enable or disable models, change how they
42+
# are materialized, and more!
43+
models:
44+
airbyte_utils:
45+
+materialized: table
46+
generated:
47+
airbyte_ctes:
48+
+tags: airbyte_internal_cte
49+
+materialized: ephemeral
50+
airbyte_incremental:
51+
+tags: incremental_tables
52+
+materialized: incremental
53+
# schema change test isn't supported in ClickHouse yet
54+
+on_schema_change: "ignore"
55+
airbyte_tables:
56+
+tags: normalized_tables
57+
+materialized: table
58+
airbyte_views:
59+
+tags: airbyte_internal_views
60+
+materialized: view
61+
62+
dispatch:
63+
- macro_namespace: dbt_utils
64+
search_order: ['airbyte_utils', 'dbt_utils']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# add dependencies. these will get pulled during the `dbt deps` process.
2+
3+
packages:
4+
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
5+
revision: 0.7.3

airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql

+37
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
VARCHAR(max)
3333
{%- endmacro -%}
3434

35+
{% macro clickhouse__type_json() %}
36+
String
37+
{% endmacro %}
38+
3539

3640
{# string ------------------------------------------------- #}
3741

@@ -47,6 +51,10 @@
4751
VARCHAR(max)
4852
{%- endmacro -%}
4953

54+
{%- macro clickhouse__type_string() -%}
55+
String
56+
{%- endmacro -%}
57+
5058

5159
{# float ------------------------------------------------- #}
5260
{% macro mysql__type_float() %}
@@ -57,6 +65,10 @@
5765
float
5866
{% endmacro %}
5967

68+
{% macro clickhouse__type_float() %}
69+
Float64
70+
{% endmacro %}
71+
6072

6173
{# int ------------------------------------------------- #}
6274
{% macro default__type_int() %}
@@ -67,6 +79,11 @@
6779
int
6880
{% endmacro %}
6981

82+
{% macro clickhouse__type_int() %}
83+
INT
84+
{% endmacro %}
85+
86+
7087
{# bigint ------------------------------------------------- #}
7188
{% macro mysql__type_bigint() %}
7289
signed
@@ -76,12 +93,20 @@
7693
numeric
7794
{% endmacro %}
7895

96+
{% macro clickhouse__type_bigint() %}
97+
BIGINT
98+
{% endmacro %}
99+
79100

80101
{# numeric ------------------------------------------------- --#}
81102
{% macro mysql__type_numeric() %}
82103
float
83104
{% endmacro %}
84105

106+
{% macro clickhouse__type_numeric() %}
107+
Float64
108+
{% endmacro %}
109+
85110

86111
{# timestamp ------------------------------------------------- --#}
87112
{% macro mysql__type_timestamp() %}
@@ -94,6 +119,10 @@
94119
datetime
95120
{%- endmacro -%}
96121

122+
{% macro clickhouse__type_timestamp() %}
123+
DateTime64
124+
{% endmacro %}
125+
97126

98127
{# timestamp with time zone ------------------------------------------------- #}
99128

@@ -124,6 +153,10 @@
124153
datetime
125154
{%- endmacro -%}
126155

156+
{% macro clickhouse__type_timestamp_with_timezone() %}
157+
DateTime64
158+
{% endmacro %}
159+
127160

128161
{# date ------------------------------------------------- #}
129162

@@ -142,3 +175,7 @@
142175
{%- macro sqlserver__type_date() -%}
143176
date
144177
{%- endmacro -%}
178+
179+
{% macro clickhouse__type_date() %}
180+
Date
181+
{% endmacro %}

0 commit comments

Comments
 (0)