From b3715d8324ef14c6c6de78a6a91cdd29e411bce7 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 9 May 2022 13:13:12 -0700 Subject: [PATCH 01/43] first pass at deletions --- .../transform_catalog/stream_processor.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 53c3ea73aba2b..7361a64d88da3 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -297,7 +297,13 @@ def process(self) -> List["StreamProcessor"]: unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), partition_by=PartitionScheme.ACTIVE_ROW, ) - where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" + where_clause = f"""\nand ( + {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1 + or + cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}}) + >= + (select max(cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}})) from {{{{ this }}}}) +)""" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows self.add_to_outputs( self.generate_final_model(from_table, column_names, self.get_unique_key()) + where_clause, @@ -305,6 +311,8 @@ def process(self) -> List["StreamProcessor"]: is_intermediate=False, unique_key=self.get_unique_key(), partition_by=PartitionScheme.UNIQUE_KEY, + do_deletions=True, + scd_table=from_table, ) return self.find_children_streams(from_table, column_names) @@ -1060,6 +1068,8 @@ def add_to_outputs( unique_key: str = "", subdir: str = "", partition_by: PartitionScheme = PartitionScheme.DEFAULT, + do_deletions: bool = False, + scd_table: str = "", ) -> str: schema = self.get_schema(is_intermediate) # MySQL table names need to be manually truncated, because it does not do it automatically @@ -1095,6 +1105,23 @@ def add_to_outputs( else: # incremental is handled in the SCD SQL already sql = self.add_incremental_clause(sql) + if do_deletions: + # TODO figure out how to get SCD table name correctly + config[ + "post_hook" + ] = f"""[" +delete from {{{{ this }}}} +where _airbyte_ab_id in ( + select _airbyte_ab_id + from {{{{ {scd_table} }}}} + where + _airbyte_active_row = 0 + and + cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}}) + >= + (select max(cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}})) from {{{{ {scd_table} }}}}) +) + "]""" template = Template( """ {{ '{{' }} config( From ce8970b0eeb9cf90340fb2221ea9ed5c2b08d21e Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 12 May 2022 16:33:12 -0700 Subject: [PATCH 02/43] wip do deletions --- .../transform_catalog/stream_processor.py | 68 ++++++++++++------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 7361a64d88da3..58b4ef404227e 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -297,16 +297,9 @@ def process(self) -> List["StreamProcessor"]: unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), partition_by=PartitionScheme.ACTIVE_ROW, ) - where_clause = f"""\nand ( - {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1 - or - cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}}) - >= - (select max(cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}})) from {{{{ this }}}}) -)""" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows self.add_to_outputs( - self.generate_final_model(from_table, column_names, self.get_unique_key()) + where_clause, + self.generate_final_model(from_table, column_names, self.get_unique_key(), remove_duplicates_and_deletions=True), self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), is_intermediate=False, unique_key=self.get_unique_key(), @@ -1001,9 +994,11 @@ def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], pa else: raise ValueError(f"No path specified for stream {self.stream_name}") - def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "") -> Any: - template = Template( - """ + def generate_final_model( + self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "", remove_duplicates_and_deletions=False + ) -> Any: + outer_query_source_table = jinja_call(from_table) + query = """ -- Final base SQL model -- depends_on: {{ from_table }} select @@ -1020,11 +1015,41 @@ def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[st {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, {{ hash_id }} -from {{ from_table }} +from {{ outer_query_source_table }} {{ sql_table_comment }} where 1 = 1 """ - ) + if remove_duplicates_and_deletions: + # TODO better name for this table? + outer_query_source_table = "partitioned_data" + # TODO do this incrementally to avoid scanning the whole table + query = ( + """ +with partitioned_data as ( + select row_number() over ( + partition by _airbyte_unique_key + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ from_table }}.* + from {{ from_table }} +) + """ + + query + # Note that {{ '{{ type_timestamp_with_timezone() }}' }} resolves to a jinja macro, which is resolved by DBT to a SQL type + + """\nand ( +{{ normalized_active_row_column_name }} = 1 +or ( +cast({{ normalized_normalized_at_column_name }} as {{ '{{ type_timestamp_with_timezone() }}' }}) + >= + (select max(cast({{ normalized_normalized_at_column_name }} as {{ '{{ type_timestamp_with_timezone() }}' }})) from {{ from_table }}) +and +_airbyte_end_at is null +and +_airbyte_row_num = 1 +) +)""" + ) + template = Template(query) sql = template.render( col_ab_id=self.get_ab_id(), col_emitted_at=self.get_emitted_at(), @@ -1035,6 +1060,9 @@ def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[st from_table=jinja_call(from_table), sql_table_comment=self.sql_table_comment(include_from_table=True), unique_key=unique_key, + normalized_active_row_column_name=self.name_transformer.normalize_column_name("_airbyte_active_row"), + normalized_normalized_at_column_name=self.name_transformer.normalize_column_name(self.airbyte_normalized_at), + outer_query_source_table=outer_query_source_table, ) return sql @@ -1109,19 +1137,7 @@ def add_to_outputs( # TODO figure out how to get SCD table name correctly config[ "post_hook" - ] = f"""[" -delete from {{{{ this }}}} -where _airbyte_ab_id in ( - select _airbyte_ab_id - from {{{{ {scd_table} }}}} - where - _airbyte_active_row = 0 - and - cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}}) - >= - (select max(cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}})) from {{{{ {scd_table} }}}}) -) - "]""" + ] = f"""["delete from {{{{ this }}}} where _airbyte_ab_id in ( select _airbyte_ab_id from {{{{ {scd_table} }}}} where _airbyte_active_row = 0 and cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}}) >= (select max(cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}})) from {{{{ {scd_table} }}}}))"]""" template = Template( """ {{ '{{' }} config( From d270b92006a30cdedf4fd250f9991646565b655b Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 13 May 2022 09:26:10 -0700 Subject: [PATCH 03/43] update tests to reflect deleted rows --- .../simple_streams_second_run_row_counts.sql | 2 +- .../simple_streams_third_run_row_counts.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql index ca5cdfa4fc40d..b5552676f0d75 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql @@ -24,7 +24,7 @@ union all select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 9 as expected_count from {{ ref('dedup_cdc_excluded_scd') }} union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count from {{ ref('dedup_cdc_excluded') }} union all diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql index cb886df680e97..ae3791053cc95 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql @@ -24,7 +24,7 @@ union all select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 9 as expected_count from test_normalization.dedup_cdc_excluded_scd union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count from test_normalization.dedup_cdc_excluded ) select * From ab5590a0e676a1df1ef6eae08ec7d5d1a3176b52 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 13 May 2022 10:06:42 -0700 Subject: [PATCH 04/43] revert final table generation --- .../transform_catalog/stream_processor.py | 48 +++---------------- 1 file changed, 7 insertions(+), 41 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 58b4ef404227e..6c66f141be588 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -297,9 +297,10 @@ def process(self) -> List["StreamProcessor"]: unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), partition_by=PartitionScheme.ACTIVE_ROW, ) + where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows self.add_to_outputs( - self.generate_final_model(from_table, column_names, self.get_unique_key(), remove_duplicates_and_deletions=True), + self.generate_final_model(from_table, column_names, self.get_unique_key()) + where_clause, self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), is_intermediate=False, unique_key=self.get_unique_key(), @@ -994,11 +995,9 @@ def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], pa else: raise ValueError(f"No path specified for stream {self.stream_name}") - def generate_final_model( - self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "", remove_duplicates_and_deletions=False - ) -> Any: - outer_query_source_table = jinja_call(from_table) - query = """ + def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "") -> Any: + template = Template( + """ -- Final base SQL model -- depends_on: {{ from_table }} select @@ -1015,41 +1014,11 @@ def generate_final_model( {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, {{ hash_id }} -from {{ outer_query_source_table }} +from {{ from_table }} {{ sql_table_comment }} where 1 = 1 """ - if remove_duplicates_and_deletions: - # TODO better name for this table? - outer_query_source_table = "partitioned_data" - # TODO do this incrementally to avoid scanning the whole table - query = ( - """ -with partitioned_data as ( - select row_number() over ( - partition by _airbyte_unique_key - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ from_table }}.* - from {{ from_table }} -) - """ - + query - # Note that {{ '{{ type_timestamp_with_timezone() }}' }} resolves to a jinja macro, which is resolved by DBT to a SQL type - + """\nand ( -{{ normalized_active_row_column_name }} = 1 -or ( -cast({{ normalized_normalized_at_column_name }} as {{ '{{ type_timestamp_with_timezone() }}' }}) - >= - (select max(cast({{ normalized_normalized_at_column_name }} as {{ '{{ type_timestamp_with_timezone() }}' }})) from {{ from_table }}) -and -_airbyte_end_at is null -and -_airbyte_row_num = 1 -) -)""" - ) - template = Template(query) + ) sql = template.render( col_ab_id=self.get_ab_id(), col_emitted_at=self.get_emitted_at(), @@ -1060,9 +1029,6 @@ def generate_final_model( from_table=jinja_call(from_table), sql_table_comment=self.sql_table_comment(include_from_table=True), unique_key=unique_key, - normalized_active_row_column_name=self.name_transformer.normalize_column_name("_airbyte_active_row"), - normalized_normalized_at_column_name=self.name_transformer.normalize_column_name(self.airbyte_normalized_at), - outer_query_source_table=outer_query_source_table, ) return sql From 0be27760c5b066359122fcec8208f2ae6a653da5 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 13 May 2022 19:25:02 -0700 Subject: [PATCH 05/43] wip incremental delete --- .../macros/incremental.sql | 18 +++++----- .../transform_catalog/stream_processor.py | 34 ++++++++++++++++--- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql index f70b4798075c2..328c293c1dbb6 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql @@ -4,14 +4,14 @@ - incremental_clause controls the predicate to filter on new data to process incrementally #} -{% macro incremental_clause(col_emitted_at) -%} - {{ adapter.dispatch('incremental_clause')(col_emitted_at) }} +{% macro incremental_clause(col_emitted_at, tablename) -%} + {{ adapter.dispatch('incremental_clause')(col_emitted_at, tablename) }} {%- endmacro %} -{%- macro default__incremental_clause(col_emitted_at) -%} +{%- macro default__incremental_clause(col_emitted_at, tablename) -%} {% if is_incremental() %} and coalesce( - cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >= (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ this }}), + cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >= (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}), {# -- if {{ col_emitted_at }} is NULL in either table, the previous comparison would evaluate to NULL, #} {# -- so we coalesce and make sure the row is always returned for incremental processing instead #} true) @@ -19,7 +19,7 @@ and coalesce( {%- endmacro -%} {# -- see https://on-systems.tech/113-beware-dbt-incremental-updates-against-snowflake-external-tables/ #} -{%- macro snowflake__incremental_clause(col_emitted_at) -%} +{%- macro snowflake__incremental_clause(col_emitted_at, tablename) -%} {% if is_incremental() %} {% if get_max_normalized_cursor(col_emitted_at) %} and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >= @@ -28,11 +28,11 @@ and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >= {% endif %} {%- endmacro -%} -{%- macro sqlserver__incremental_clause(col_emitted_at) -%} +{%- macro sqlserver__incremental_clause(col_emitted_at, tablename) -%} {% if is_incremental() %} -and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ this }}) is null +and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}) is null or cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >= - (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ this }})) + (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }})) {% endif %} {%- endmacro -%} @@ -40,7 +40,7 @@ and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() {% if execute and is_incremental() %} {% if env_var('INCREMENTAL_CURSOR', 'UNSET') == 'UNSET' %} {% set query %} - select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ this }} + select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }} {% endset %} {% set max_cursor = run_query(query).columns[0][0] %} {% do return(max_cursor) %} diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 6c66f141be588..3a8bd6ee2f513 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -831,7 +831,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup from {{'{{'}} {{ from_table }} {{'}}'}} {{ sql_table_comment }} where 1 = 1 - {{'{{'}} incremental_clause({{ quoted_col_emitted_at }}) {{'}}'}} + {{'{{'}} incremental_clause({{ quoted_col_emitted_at }}, this) {{'}}'}} ), new_data_ids as ( -- build a subset of {{ unique_key }} from rows that are new @@ -1040,7 +1040,7 @@ def add_incremental_clause(self, sql_query: str) -> Any: template = Template( """ {{ sql_query }} -{{'{{'}} incremental_clause({{ col_emitted_at }}) {{'}}'}} +{{'{{'}} incremental_clause({{ col_emitted_at }}, this) {{'}}'}} """ ) sql = template.render( @@ -1049,6 +1049,10 @@ def add_incremental_clause(self, sql_query: str) -> Any: ) return sql + # TODO use this method in add_incremental_clause (maybe not, since we need to configure the table name :/ ) + def get_incremental_clause(self, tablename: str) -> Any: + return "{{ incremental_clause(" + self.get_emitted_at(in_jinja=True) + ", " + tablename + ") }}" + @staticmethod def list_fields(column_names: Dict[str, Tuple[str, str]]) -> List[str]: return [column_names[field][0] for field in column_names] @@ -1101,9 +1105,29 @@ def add_to_outputs( sql = self.add_incremental_clause(sql) if do_deletions: # TODO figure out how to get SCD table name correctly - config[ - "post_hook" - ] = f"""["delete from {{{{ this }}}} where _airbyte_ab_id in ( select _airbyte_ab_id from {{{{ {scd_table} }}}} where _airbyte_active_row = 0 and cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}}) >= (select max(cast({self.name_transformer.normalize_column_name(self.airbyte_normalized_at)} as {{{{ type_timestamp_with_timezone() }}}})) from {{{{ {scd_table} }}}}))"]""" + deletion_hook = Template( + """ +with partitioned_data as ( + select row_number() over ( + partition by _airbyte_unique_key + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ scd_table }}.* + from {{ scd_table }} + where 1=1 + {{ incremental_clause }} +) +delete from {{ '{{ this }}' }} where _airbyte_unique_key in ( + select _airbyte_unique_key from partitioned_data + where _airbyte_active_row = 0 and _airbyte_row_num = 1 +) +""" + ).render( + scd_table="{{" + scd_table + "}}", + normalized_normalized_at_column_name=self.name_transformer.normalize_column_name(self.airbyte_normalized_at), + incremental_clause=self.get_incremental_clause(scd_table), + ) + config["post_hook"] = '["' + deletion_hook + '"]' template = Template( """ {{ '{{' }} config( From eb2f0eeba4c95297b5b07480c37d00569681fe8b Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Sun, 15 May 2022 10:30:35 -0700 Subject: [PATCH 06/43] refactor --- .../normalization/transform_catalog/stream_processor.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 3a8bd6ee2f513..2c080167dde8c 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1040,16 +1040,12 @@ def add_incremental_clause(self, sql_query: str) -> Any: template = Template( """ {{ sql_query }} -{{'{{'}} incremental_clause({{ col_emitted_at }}, this) {{'}}'}} +{{ incremental_clause }} """ ) - sql = template.render( - sql_query=sql_query, - col_emitted_at=self.get_emitted_at(in_jinja=True), - ) + sql = template.render(sql_query=sql_query, incremental_clause=self.get_incremental_clause("this")) return sql - # TODO use this method in add_incremental_clause (maybe not, since we need to configure the table name :/ ) def get_incremental_clause(self, tablename: str) -> Any: return "{{ incremental_clause(" + self.get_emitted_at(in_jinja=True) + ", " + tablename + ") }}" From 448edf6673d3de6059902e113511efa2f6ea73d6 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Sun, 15 May 2022 10:32:09 -0700 Subject: [PATCH 07/43] remove todo --- .../normalization/transform_catalog/stream_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 2c080167dde8c..a9c963494eadd 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1100,7 +1100,6 @@ def add_to_outputs( # incremental is handled in the SCD SQL already sql = self.add_incremental_clause(sql) if do_deletions: - # TODO figure out how to get SCD table name correctly deletion_hook = Template( """ with partitioned_data as ( From 76dc17bac07f6691094a4114981f027c90abc2c7 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 17 May 2022 15:06:06 -0700 Subject: [PATCH 08/43] fix macros --- .../dbt-project-template/macros/incremental.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql index 328c293c1dbb6..86750a85ebcb3 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql @@ -21,9 +21,9 @@ and coalesce( {# -- see https://on-systems.tech/113-beware-dbt-incremental-updates-against-snowflake-external-tables/ #} {%- macro snowflake__incremental_clause(col_emitted_at, tablename) -%} {% if is_incremental() %} - {% if get_max_normalized_cursor(col_emitted_at) %} + {% if get_max_normalized_cursor(col_emitted_at, tablename) %} and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) >= - cast('{{ get_max_normalized_cursor(col_emitted_at) }}' as {{ type_timestamp_with_timezone() }}) + cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }}) {% endif %} {% endif %} {%- endmacro -%} @@ -36,7 +36,7 @@ and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() {% endif %} {%- endmacro -%} -{% macro get_max_normalized_cursor(col_emitted_at) %} +{% macro get_max_normalized_cursor(col_emitted_at, tablename) %} {% if execute and is_incremental() %} {% if env_var('INCREMENTAL_CURSOR', 'UNSET') == 'UNSET' %} {% set query %} From 4661e9e3410f861025f7062cc5bddd9399166c84 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 18 May 2022 15:28:57 -0700 Subject: [PATCH 09/43] use subquery for delete --- .../transform_catalog/stream_processor.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index a9c963494eadd..c4b150b6eb211 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1102,18 +1102,17 @@ def add_to_outputs( if do_deletions: deletion_hook = Template( """ -with partitioned_data as ( - select row_number() over ( - partition by _airbyte_unique_key - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ scd_table }}.* - from {{ scd_table }} - where 1=1 - {{ incremental_clause }} -) delete from {{ '{{ this }}' }} where _airbyte_unique_key in ( - select _airbyte_unique_key from partitioned_data + select _airbyte_unique_key from ( + select row_number() over ( + partition by _airbyte_unique_key + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ scd_table }}.* + from {{ scd_table }} + where 1=1 + {{ incremental_clause }} + ) partitioned_data where _airbyte_active_row = 0 and _airbyte_row_num = 1 ) """ From ea597c8f04f7866ea5c1f472762928bdb582d49f Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 19 May 2022 09:51:52 -0700 Subject: [PATCH 10/43] wip new_data model --- .../transform_catalog/stream_processor.py | 60 ++++++++++++++----- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index c4b150b6eb211..2ddf89685834d 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -288,8 +288,16 @@ def process(self) -> List["StreamProcessor"]: is_intermediate=True, suffix="stg", ) + + scd_new_data_table = self.add_to_outputs( + self.generate_scd_new_data_model(from_table, column_names), + materialization_mode=forced_materialization_type, + is_intermediate=True, + suffix="scd_new_data", + ) + from_table = self.add_to_outputs( - self.generate_scd_type_2_model(from_table, column_names), + self.generate_scd_type_2_model(from_table, scd_new_data_table, column_names), self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), is_intermediate=False, suffix="scd", @@ -300,7 +308,8 @@ def process(self) -> List["StreamProcessor"]: where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows self.add_to_outputs( - self.generate_final_model(from_table, column_names, self.get_unique_key()) + where_clause, + self.generate_final_model(from_table, column_names, scd_new_data_table=scd_new_data_table, unique_key=self.get_unique_key()) + + where_clause, self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), is_intermediate=False, unique_key=self.get_unique_key(), @@ -669,7 +678,31 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De return col - def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + def generate_scd_new_data_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + jinja_variables = { + "from_table": from_table, + "quoted_col_emitted_at": self.get_emitted_at(in_jinja=True), + "sql_table_comment": self.sql_table_comment(include_from_table=True), + } + sql = Template( + """ +-- depends_on: {{ from_table }} +{{ '{% if is_incremental() %}' }} +-- retrieve incremental "new" data +select + * +from {{'{{'}} {{ from_table }} {{'}}'}} +{{ sql_table_comment }} +where 1 = 1 +{{'{{'}} incremental_clause({{ quoted_col_emitted_at }}, this) {{'}}'}} +{{ '{% else %}' }} +select * from {{'{{'}} {{ from_table }} {{'}}'}} where 1 = 0 -- create an empty table just to get the columns +{{ '{% endif %}' }} +""" + ).render(jinja_variables) + return sql + + def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: cursor_field = self.get_cursor_field(column_names) order_null = f"is null asc,\n {cursor_field} desc" if self.destination_type.value == DestinationType.ORACLE.value: @@ -764,6 +797,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup "input_data_table": input_data_table, "lag_begin": lag_begin, "lag_end": lag_end, + "new_data_table": new_data_table, "order_null": order_null, "parent_hash_id": self.parent_hash_id(), "primary_key_partition": self.get_primary_key_partition(column_names), @@ -822,17 +856,9 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup sql = Template( """ -- depends_on: {{ from_table }} +-- depends on: {{ '{{' }} {{ new_data_table }} {{ '}}' }} with {{ '{% if is_incremental() %}' }} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{'{{'}} {{ from_table }} {{'}}'}} - {{ sql_table_comment }} - where 1 = 1 - {{'{{'}} incremental_clause({{ quoted_col_emitted_at }}, this) {{'}}'}} -), new_data_ids as ( -- build a subset of {{ unique_key }} from rows that are new select distinct @@ -841,11 +867,11 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ primary_key }}, {%- endfor %} ]) {{ '}}' }} as {{ unique_key }} - from new_data + from {{ '{{' }} {{ new_data_table }} {{ '}}' }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ '{{' }} {{ new_data_table }} {{ '}}' }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -859,7 +885,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup where {{ active_row }} = 1 ), input_data as ( - select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from new_data + select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from {{ '{{' }} {{ new_data_table }} {{ '}}' }} union all select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from previous_active_scd_data ), @@ -995,7 +1021,9 @@ def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], pa else: raise ValueError(f"No path specified for stream {self.stream_name}") - def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "") -> Any: + def generate_final_model( + self, from_table: str, column_names: Dict[str, Tuple[str, str]], scd_new_data_table: str = None, unique_key: str = "" + ) -> Any: template = Template( """ -- Final base SQL model From 854a98b6d05a2686afee5650476992af1289df1e Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 20 May 2022 11:19:27 -0700 Subject: [PATCH 11/43] wip --- .../transform_catalog/stream_processor.py | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 2ddf89685834d..080bc26e51fc3 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -308,6 +308,7 @@ def process(self) -> List["StreamProcessor"]: where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows self.add_to_outputs( + # TODO scd_new_data_table needs to be passed into add_to_outputs, not generate_final_model self.generate_final_model(from_table, column_names, scd_new_data_table=scd_new_data_table, unique_key=self.get_unique_key()) + where_clause, self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), @@ -696,7 +697,7 @@ def generate_scd_new_data_model(self, from_table: str, column_names: Dict[str, T where 1 = 1 {{'{{'}} incremental_clause({{ quoted_col_emitted_at }}, this) {{'}}'}} {{ '{% else %}' }} -select * from {{'{{'}} {{ from_table }} {{'}}'}} where 1 = 0 -- create an empty table just to get the columns +select * from {{'{{'}} {{ from_table }} {{'}}'}} {{ '{% endif %}' }} """ ).render(jinja_variables) @@ -855,7 +856,6 @@ def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column jinja_variables["scd_columns_sql"] = scd_columns_sql sql = Template( """ --- depends_on: {{ from_table }} -- depends on: {{ '{{' }} {{ new_data_table }} {{ '}}' }} with {{ '{% if is_incremental() %}' }} @@ -1115,15 +1115,35 @@ def add_to_outputs( stg_table = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "stg", truncate_name) if self.name_transformer.needs_quotes(stg_table): stg_table = jinja_call(self.name_transformer.apply_quote(stg_table)) + + hooks = [] + + # Drop rows from the final table which need to be updated + # TODO does the final table always get populated into tables_registry before the SCD table? + final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) + hashid_column_name = self.hash_id(False) + hooks.append( + f""" + {{% if adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='{final_table_name}') is not none %}} + delete from {{{{ this.schema }}}}.{jinja_call(self.name_transformer.apply_quote(final_table_name))} where {hashid_column_name} in (select {hashid_column_name} from {stg_schema}.{stg_table}) + {{% endif %}} + """ + ) + if self.destination_type.value == DestinationType.POSTGRES.value: # Keep only rows with the max emitted_at to keep incremental behavior - config["post_hook"] = ( - f'["delete from {stg_schema}.{stg_table} ' - + f"where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) " - + f'from {stg_schema}.{stg_table})"]' + hooks.append( + f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", ) else: - config["post_hook"] = f'["drop view {stg_schema}.{stg_table}"]' + hooks.append( + f"drop view {stg_schema}.{stg_table}", + ) + + config["post_hook"] = "[" + ",".join(map(lambda hook: '"' + hook + '"', hooks)) + "]" else: # incremental is handled in the SCD SQL already sql = self.add_incremental_clause(sql) From 36aa68b434d6470521456a11aff47a55d110f0f3 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 20 May 2022 19:53:15 -0700 Subject: [PATCH 12/43] wip move delete hook to scd --- .../transform_catalog/stream_processor.py | 94 ++++++++++--------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 080bc26e51fc3..411e276e03e4c 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -304,6 +304,8 @@ def process(self) -> List["StreamProcessor"]: subdir="scd", unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), partition_by=PartitionScheme.ACTIVE_ROW, + do_deletions=True, + column_names=column_names, ) where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows @@ -315,8 +317,8 @@ def process(self) -> List["StreamProcessor"]: is_intermediate=False, unique_key=self.get_unique_key(), partition_by=PartitionScheme.UNIQUE_KEY, - do_deletions=True, scd_table=from_table, + column_names=column_names, ) return self.find_children_streams(from_table, column_names) @@ -1092,6 +1094,7 @@ def add_to_outputs( partition_by: PartitionScheme = PartitionScheme.DEFAULT, do_deletions: bool = False, scd_table: str = "", + column_names: Dict[str, Tuple[str, str]] = {}, ) -> str: schema = self.get_schema(is_intermediate) # MySQL table names need to be manually truncated, because it does not do it automatically @@ -1110,28 +1113,55 @@ def add_to_outputs( else: config["schema"] = f'"{schema}"' if self.is_incremental_mode(self.destination_sync_mode): + stg_schema = self.get_schema(True) + stg_table = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "stg", truncate_name) + if self.name_transformer.needs_quotes(stg_table): + stg_table = jinja_call(self.name_transformer.apply_quote(stg_table)) if suffix == "scd": - stg_schema = self.get_schema(True) - stg_table = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "stg", truncate_name) - if self.name_transformer.needs_quotes(stg_table): - stg_table = jinja_call(self.name_transformer.apply_quote(stg_table)) - hooks = [] - # Drop rows from the final table which need to be updated - # TODO does the final table always get populated into tables_registry before the SCD table? - final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) - hashid_column_name = self.hash_id(False) - hooks.append( - f""" - {{% if adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='{final_table_name}') is not none %}} - delete from {{{{ this.schema }}}}.{jinja_call(self.name_transformer.apply_quote(final_table_name))} where {hashid_column_name} in (select {hashid_column_name} from {stg_schema}.{stg_table}) - {{% endif %}} - """ - ) + # This delete query depends on the _stg model, so run it before we drop/update the _stg view + if do_deletions: + final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) + # final_table_name = self.final_table_name + deletion_hook = Template( + """ + {{ '{%' }} + if adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='{{ final_table_name }}' + ) is not none + {{ '%}' }} + + delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} where {{ unique_key }} in ( + select + {{ '{{' }} dbt_utils.surrogate_key([ + {%- for primary_key in primary_keys %} + {{ primary_key }}, + {%- endfor %} + ]) {{ '}}' }} as {{ unique_key }} + from {{stg_schema}}.{{stg_table}} + where 1=1 {{ incremental_clause }} + ) + + {{ '{% else %}' }} + -- If the table doesn't exist, then we shouldn't try to delete it. + -- We have to have a non-empty query, so just do a simple select here. + select 1 + {{ '{% endif %}' }} + """ + ).render( + final_table_name=final_table_name, + quoted_final_table_name=jinja_call(self.name_transformer.apply_quote(final_table_name)), + unique_key=self.get_unique_key(), + primary_keys=self.list_primary_keys(column_names), + stg_schema=stg_schema, + stg_table=stg_table, + # TODO should this be using the final table? + incremental_clause=self.get_incremental_clause("this"), + ) + hooks.append(deletion_hook) if self.destination_type.value == DestinationType.POSTGRES.value: # Keep only rows with the max emitted_at to keep incremental behavior @@ -1142,34 +1172,10 @@ def add_to_outputs( hooks.append( f"drop view {stg_schema}.{stg_table}", ) - config["post_hook"] = "[" + ",".join(map(lambda hook: '"' + hook + '"', hooks)) + "]" else: # incremental is handled in the SCD SQL already sql = self.add_incremental_clause(sql) - if do_deletions: - deletion_hook = Template( - """ -delete from {{ '{{ this }}' }} where _airbyte_unique_key in ( - select _airbyte_unique_key from ( - select row_number() over ( - partition by _airbyte_unique_key - order by _airbyte_active_row desc, _airbyte_ab_id - ) as _airbyte_row_num, - {{ scd_table }}.* - from {{ scd_table }} - where 1=1 - {{ incremental_clause }} - ) partitioned_data - where _airbyte_active_row = 0 and _airbyte_row_num = 1 -) -""" - ).render( - scd_table="{{" + scd_table + "}}", - normalized_normalized_at_column_name=self.name_transformer.normalize_column_name(self.airbyte_normalized_at), - incremental_clause=self.get_incremental_clause(scd_table), - ) - config["post_hook"] = '["' + deletion_hook + '"]' template = Template( """ {{ '{{' }} config( From 26f5668683edf65e5ac7b8032d1d33996844fb12 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 24 May 2022 11:08:20 -0700 Subject: [PATCH 13/43] drop new_data model; clean up code --- .../transform_catalog/stream_processor.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 411e276e03e4c..e88e9333a65c3 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -290,7 +290,7 @@ def process(self) -> List["StreamProcessor"]: ) scd_new_data_table = self.add_to_outputs( - self.generate_scd_new_data_model(from_table, column_names), + self.generate_scd_new_data_model(from_table), materialization_mode=forced_materialization_type, is_intermediate=True, suffix="scd_new_data", @@ -306,18 +306,16 @@ def process(self) -> List["StreamProcessor"]: partition_by=PartitionScheme.ACTIVE_ROW, do_deletions=True, column_names=column_names, + scd_new_data_table=scd_new_data_table, ) where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows self.add_to_outputs( - # TODO scd_new_data_table needs to be passed into add_to_outputs, not generate_final_model - self.generate_final_model(from_table, column_names, scd_new_data_table=scd_new_data_table, unique_key=self.get_unique_key()) - + where_clause, + self.generate_final_model(from_table, column_names, unique_key=self.get_unique_key()) + where_clause, self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), is_intermediate=False, unique_key=self.get_unique_key(), partition_by=PartitionScheme.UNIQUE_KEY, - scd_table=from_table, column_names=column_names, ) return self.find_children_streams(from_table, column_names) @@ -681,7 +679,7 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De return col - def generate_scd_new_data_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + def generate_scd_new_data_model(self, from_table: str) -> Any: jinja_variables = { "from_table": from_table, "quoted_col_emitted_at": self.get_emitted_at(in_jinja=True), @@ -1023,9 +1021,7 @@ def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], pa else: raise ValueError(f"No path specified for stream {self.stream_name}") - def generate_final_model( - self, from_table: str, column_names: Dict[str, Tuple[str, str]], scd_new_data_table: str = None, unique_key: str = "" - ) -> Any: + def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "") -> Any: template = Template( """ -- Final base SQL model @@ -1093,8 +1089,8 @@ def add_to_outputs( subdir: str = "", partition_by: PartitionScheme = PartitionScheme.DEFAULT, do_deletions: bool = False, - scd_table: str = "", column_names: Dict[str, Tuple[str, str]] = {}, + scd_new_data_table: str = "", ) -> str: schema = self.get_schema(is_intermediate) # MySQL table names need to be manually truncated, because it does not do it automatically @@ -1166,12 +1162,17 @@ def add_to_outputs( if self.destination_type.value == DestinationType.POSTGRES.value: # Keep only rows with the max emitted_at to keep incremental behavior hooks.append( - f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", + f"delete from {{{{ {scd_new_data_table} }}}} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {{{{ {scd_new_data_table} }}}})", ) - else: hooks.append( - f"drop view {stg_schema}.{stg_table}", + f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", ) + else: + # Note the different macro styles: + # scd_new_data_table is a DBT ref() macro, so we wrap it in another {{ ... }} so that DBT will resolve it + # stg_schema+stg_table are plain strings, so they need to be rendered as plain strings + hooks.append(f"drop view {{{{ {scd_new_data_table} }}}}") + hooks.append(f"drop view {stg_schema}.{stg_table}") config["post_hook"] = "[" + ",".join(map(lambda hook: '"' + hook + '"', hooks)) + "]" else: # incremental is handled in the SCD SQL already From 2976e5c9a3c6f4fc247336675eea74d623c19275 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 24 May 2022 20:24:13 -0700 Subject: [PATCH 14/43] wip better delete logic (need to verify performance) --- .../transform_catalog/stream_processor.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index e88e9333a65c3..9f97643b54c72 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1119,7 +1119,7 @@ def add_to_outputs( # This delete query depends on the _stg model, so run it before we drop/update the _stg view if do_deletions: final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) - # final_table_name = self.final_table_name + active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") deletion_hook = Template( """ {{ '{%' }} @@ -1130,15 +1130,29 @@ def add_to_outputs( ) is not none {{ '%}' }} + -- Delete records which are no longer active. + -- Find the records which are being updated by querying the _scd_new_data model + -- Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} where {{ unique_key }} in ( - select - {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} - ]) {{ '}}' }} as {{ unique_key }} - from {{stg_schema}}.{{stg_table}} - where 1=1 {{ incremental_clause }} + with modified_ids as ( + select + {{ '{{' }} dbt_utils.surrogate_key([ + {%- for primary_key in primary_keys %} + {{ primary_key }}, + {%- endfor %} + ]) {{ '}}' }} as {{ unique_key }} + from {{ quoted_scd_new_data_table }} + where 1=1 + {{ incremental_clause }} + ) + select modified_ids.{{ unique_key }} + from ( + select * from {{ '{{ this }}' }} + where {{ active_row_column_name }} = 1 + ) scd_active_rows + right outer join modified_ids on modified_ids.{{ unique_key }} = scd_active_rows.{{ unique_key }} + group by modified_ids.{{ unique_key }} + having count(scd_active_rows.{{ unique_key }}) = 0 ) {{ '{% else %}' }} @@ -1154,8 +1168,12 @@ def add_to_outputs( primary_keys=self.list_primary_keys(column_names), stg_schema=stg_schema, stg_table=stg_table, - # TODO should this be using the final table? - incremental_clause=self.get_incremental_clause("this"), + incremental_clause=self.get_incremental_clause( + "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name) + ), + scd_new_data_table=scd_new_data_table, + quoted_scd_new_data_table=jinja_call(scd_new_data_table), + active_row_column_name=active_row_column_name, ) hooks.append(deletion_hook) From 883f4d6091034b43901b5c96b366d4cda0614a39 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 27 May 2022 08:07:07 -0700 Subject: [PATCH 15/43] better delete logic+comments --- .../transform_catalog/stream_processor.py | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 9f97643b54c72..d969f6615e7b7 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1123,17 +1123,31 @@ def add_to_outputs( deletion_hook = Template( """ {{ '{%' }} - if adapter.get_relation( + set final_table_relation = adapter.get_relation( database=this.database, schema=this.schema, identifier='{{ final_table_name }}' - ) is not none + ) + {{ '%}' }} + {{ '{#' }} + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + {{ '#}' }} + {{ '{%' }} + if final_table_relation is not none and '{{ unique_key }}' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') {{ '%}' }} - -- Delete records which are no longer active. - -- Find the records which are being updated by querying the _scd_new_data model - -- Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} where {{ unique_key }} in ( + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} + where {{ unique_key }} in ( with modified_ids as ( select {{ '{{' }} dbt_utils.surrogate_key([ @@ -1144,19 +1158,19 @@ def add_to_outputs( from {{ quoted_scd_new_data_table }} where 1=1 {{ incremental_clause }} - ) - select modified_ids.{{ unique_key }} - from ( - select * from {{ '{{ this }}' }} + ), + scd_active_rows as ( + select scd_table.* from {{ '{{ this }}' }} scd_table + inner join modified_ids on scd_table.{{ unique_key }} = modified_ids.{{ unique_key }} where {{ active_row_column_name }} = 1 - ) scd_active_rows + ) + select modified_ids.{{ unique_key }} from scd_active_rows right outer join modified_ids on modified_ids.{{ unique_key }} = scd_active_rows.{{ unique_key }} group by modified_ids.{{ unique_key }} having count(scd_active_rows.{{ unique_key }}) = 0 ) {{ '{% else %}' }} - -- If the table doesn't exist, then we shouldn't try to delete it. -- We have to have a non-empty query, so just do a simple select here. select 1 {{ '{% endif %}' }} From bdfd5ab59f71d11795095c18377bed59bb3ec1e6 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 27 May 2022 08:07:58 -0700 Subject: [PATCH 16/43] add record to test for edge case --- .../test_simple_streams/data_input/messages_incremental.txt | 1 + .../simple_streams_second_run_row_counts.sql | 6 +++--- .../simple_streams_third_run_row_counts.sql | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt index 0f4a6ee16d5eb..3e239abccfc5b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt @@ -14,6 +14,7 @@ {"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} {"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623900000000,"_ab_cdc_lsn":28010252,"_ab_cdc_deleted_at":1623900000000},"emitted_at":1623900000000}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1624000000000,"_ab_cdc_lsn":29010252,"_ab_cdc_deleted_at":null},"emitted_at":1624000000000}} {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} {"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql index b5552676f0d75..8a6a3bd7486de 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql @@ -18,13 +18,13 @@ union all union all - select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 2 as expected_count + select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} union all - select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 9 as expected_count + select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 10 as expected_count from {{ ref('dedup_cdc_excluded_scd') }} union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count from {{ ref('dedup_cdc_excluded') }} union all diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql index ae3791053cc95..dadeb6026f62e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql @@ -18,13 +18,13 @@ union all union all - select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 2 as expected_count + select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count from test_normalization._airbyte_raw_dedup_cdc_excluded union all - select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 9 as expected_count + select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 10 as expected_count from test_normalization.dedup_cdc_excluded_scd union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count from test_normalization.dedup_cdc_excluded ) select * From ee3fe8fc8081f36f9328ea8056becd8c42b13d5e Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 27 May 2022 15:26:20 -0700 Subject: [PATCH 17/43] slight tweaks --- .../transform_catalog/stream_processor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index a17f93933250f..c13d5a3b389ad 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1174,8 +1174,8 @@ def add_to_outputs( -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} - where {{ unique_key }} in ( + delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} final_table + where final_table.{{ unique_key }} in ( with modified_ids as ( select {{ '{{' }} dbt_utils.surrogate_key([ @@ -1199,14 +1199,14 @@ def add_to_outputs( ) {{ '{% else %}' }} - -- We have to have a non-empty query, so just do a simple select here. - select 1 + -- We have to have a non-empty query, so just do a noop delete + delete from {{ '{{ this }}' }} where 1=0 {{ '{% endif %}' }} """ ).render( final_table_name=final_table_name, quoted_final_table_name=jinja_call(self.name_transformer.apply_quote(final_table_name)), - unique_key=self.get_unique_key(), + unique_key=self.get_unique_key(in_jinja=False), primary_keys=self.list_primary_keys(column_names), stg_schema=stg_schema, stg_table=stg_table, From 578dd2f69d8e60f4297f58d2c40184a05db553e9 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 27 May 2022 19:14:35 -0700 Subject: [PATCH 18/43] better codegen --- .../normalization/transform_catalog/stream_processor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index c13d5a3b389ad..58d786131dd41 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1163,7 +1163,7 @@ def add_to_outputs( So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) {{ '#}' }} {{ '{%' }} - if final_table_relation is not none and '{{ unique_key }}' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + if final_table_relation is not none and {{ quoted_unique_key }} in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') {{ '%}' }} -- Delete records which are no longer active: @@ -1174,7 +1174,7 @@ def add_to_outputs( -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - delete from {{ '{{ this.schema }}' }}.{{ quoted_final_table_name }} final_table + delete from {{ '{{ final_table_relation }}' }} final_table where final_table.{{ unique_key }} in ( with modified_ids as ( select @@ -1207,6 +1207,7 @@ def add_to_outputs( final_table_name=final_table_name, quoted_final_table_name=jinja_call(self.name_transformer.apply_quote(final_table_name)), unique_key=self.get_unique_key(in_jinja=False), + quoted_unique_key=self.get_unique_key(in_jinja=True), primary_keys=self.list_primary_keys(column_names), stg_schema=stg_schema, stg_table=stg_table, From 5a082951ff6df100ce736290c0cf3a5eb36bb414 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 31 May 2022 11:39:54 -0700 Subject: [PATCH 19/43] redshift does not support ctid in delete --- .../normalization/transform_catalog/stream_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 58d786131dd41..f6a71548f0311 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1174,8 +1174,8 @@ def add_to_outputs( -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - delete from {{ '{{ final_table_relation }}' }} final_table - where final_table.{{ unique_key }} in ( + delete from {{ '{{ final_table_relation }}' }} + where {{ '{{ final_table_relation }}' }}.{{ unique_key }} in ( with modified_ids as ( select {{ '{{' }} dbt_utils.surrogate_key([ From 8239f1160eaee6c404fb8068699bfa50629dd241 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 31 May 2022 14:26:35 -0700 Subject: [PATCH 20/43] clickhouse deletes --- .../transform_catalog/stream_processor.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index f6a71548f0311..89e729eb11c20 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1148,6 +1148,12 @@ def add_to_outputs( if do_deletions: final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") + if self.destination_type == DestinationType.CLICKHOUSE: + delete_statement = "alter table {{ final_table_relation }} delete" + noop_delete_statement = "alter table {{ this }} delete where 1=0" + else: + delete_statement = "delete from {{ final_table_relation }}" + noop_delete_statement = "delete from {{ this }} where 1=0" deletion_hook = Template( """ {{ '{%' }} @@ -1174,7 +1180,7 @@ def add_to_outputs( -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - delete from {{ '{{ final_table_relation }}' }} + {{ delete_statement }} where {{ '{{ final_table_relation }}' }}.{{ unique_key }} in ( with modified_ids as ( select @@ -1200,10 +1206,12 @@ def add_to_outputs( {{ '{% else %}' }} -- We have to have a non-empty query, so just do a noop delete - delete from {{ '{{ this }}' }} where 1=0 + {{ noop_delete_statement }} {{ '{% endif %}' }} """ ).render( + delete_statement=delete_statement, + noop_delete_statement=noop_delete_statement, final_table_name=final_table_name, quoted_final_table_name=jinja_call(self.name_transformer.apply_quote(final_table_name)), unique_key=self.get_unique_key(in_jinja=False), From 8cba970932945118b52531e58f6ea1da19dd10f5 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 31 May 2022 20:17:35 -0700 Subject: [PATCH 21/43] regenerate most test outputs (missing mssql + clickhouse) --- .../test_nested_streams/dbt_project.yml | 132 ++++++++++++----- ..._columns_resulting_into_long_names_scd.sql | 2 +- ...resulting_into_long_names_scd_new_data.sql | 11 ++ ..._columns_resulting_into_long_names_ab1.sql | 2 +- ..._columns_resulting_into_long_names_ab2.sql | 2 +- ...ing_into_long_names_partition_DATA_ab1.sql | 2 +- ...esulting_into_long_names_partition_ab1.sql | 2 +- ..._names_partition_double_array_data_ab1.sql | 2 +- ..._columns_resulting_into_long_names_scd.sql | 67 +++++++-- ...plex_columns_resulting_into_long_names.sql | 2 +- ...ns_resulting_into_long_names_partition.sql | 2 +- ...sulting_into_long_names_partition_DATA.sql | 2 +- ...long_names_partition_double_array_data.sql | 2 +- ...resulting_into_long_names_scd_new_data.sql | 21 +++ ...resulting_into_long_names_scd_new_data.sql | 11 ++ .../test_simple_streams/dbt_project.yml | 80 ++++++----- .../test_simple_streams/first_dbt_project.yml | 104 +++++++++----- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 11 ++ .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 21 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 21 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 11 ++ .../dedup_exchange_rate_scd_new_data.sql | 11 ++ .../mysql/test_nested_streams/dbt_project.yml | 132 +++++++++++------ ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- ..._stream_with_co_1ng_names_scd_new_data.sql | 9 ++ ..._stream_with_co_1g_into_long_names_ab1.sql | 2 +- ..._stream_with_co_1g_into_long_names_ab2.sql | 2 +- ..._stream_with_co_2g_names_partition_ab1.sql | 2 +- ..._stream_with_co_3double_array_data_ab1.sql | 2 +- ..._stream_with_co_3es_partition_data_ab1.sql | 2 +- ..._stream_with_co_1g_into_long_names_scd.sql | 67 +++++++-- ..._stream_with_co___long_names_partition.sql | 2 +- ..._stream_with_co___names_partition_data.sql | 2 +- ..._stream_with_co__ion_double_array_data.sql | 2 +- ..._stream_with_co__lting_into_long_names.sql | 2 +- ..._stream_with_co_1ng_names_scd_new_data.sql | 19 +++ ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- ..._stream_with_co_1ng_names_scd_new_data.sql | 9 ++ .../mysql/test_simple_streams/dbt_project.yml | 104 ++++++++------ .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 ++ .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 19 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 ++ .../test_simple_streams/dbt_project.yml | 102 ++++++++----- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 ++ .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 19 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 ++ .../test_nested_streams/dbt_project.yml | 132 ++++++++++++----- ...ream_with_c__lting_into_long_names_scd.sql | 18 +-- .../some_stream_that_was_empty_scd.sql | 18 +-- ...d_stream_with_c___long_names_partition.sql | 10 +- ...d_stream_with_c___names_partition_data.sql | 12 +- ...d_stream_with_c__ion_double_array_data.sql | 12 +- ..._c__lting_into_long_names_scd_new_data.sql | 13 ++ ...ream_with_c__lting_into_long_names_stg.sql | 18 +-- ...ome_stream_that_was_empty_scd_new_data.sql | 13 ++ .../some_stream_that_was_empty_stg.sql | 16 +-- ...e_stream_with_n__lting_into_long_names.sql | 16 +-- .../conflict_stream_array.sql | 12 +- .../conflict_stream_name.sql | 12 +- ...ict_stream_name___conflict_stream_name.sql | 12 +- ...flict_stream_name_conflict_stream_name.sql | 8 +- .../conflict_stream_scalar.sql | 12 +- ...ested_stream_wi__lting_into_long_names.sql | 16 +-- .../test_normalization/unnest_alias.sql | 8 +- ...t_alias_childre__column___with__quotes.sql | 12 +- .../unnest_alias_children.sql | 10 +- .../unnest_alias_children_owner.sql | 10 +- .../conflict_stream_name_ab3.sql | 2 +- ...t_stream_name_conflict_stream_name_ab3.sql | 2 +- ...ream_with_c___long_names_partition_ab1.sql | 2 +- ...ream_with_c___long_names_partition_ab2.sql | 2 +- ...ream_with_c___long_names_partition_ab3.sql | 2 +- ...ream_with_c___names_partition_data_ab1.sql | 2 +- ...ream_with_c___names_partition_data_ab2.sql | 2 +- ...ream_with_c___names_partition_data_ab3.sql | 2 +- ...ream_with_c__ion_double_array_data_ab1.sql | 2 +- ...ream_with_c__ion_double_array_data_ab2.sql | 2 +- ...ream_with_c__ion_double_array_data_ab3.sql | 2 +- ...ream_with_c__lting_into_long_names_ab1.sql | 2 +- ...ream_with_c__lting_into_long_names_ab2.sql | 2 +- .../some_stream_that_was_empty_ab1.sql | 2 +- .../some_stream_that_was_empty_ab2.sql | 2 +- .../unnest_alias_children_ab3.sql | 2 +- ...ream_with_n__lting_into_long_names_ab1.sql | 2 +- ...ream_with_n__lting_into_long_names_ab2.sql | 2 +- ...ream_with_n__lting_into_long_names_ab3.sql | 2 +- ...ream_with_c__lting_into_long_names_scd.sql | 67 +++++++-- .../some_stream_that_was_empty_scd.sql | 67 +++++++-- ...d_stream_with_c___long_names_partition.sql | 2 +- ...d_stream_with_c___names_partition_data.sql | 2 +- ...d_stream_with_c__ion_double_array_data.sql | 2 +- ...d_stream_with_c__lting_into_long_names.sql | 2 +- ..._c__lting_into_long_names_scd_new_data.sql | 20 +++ ...ream_with_c__lting_into_long_names_stg.sql | 4 +- .../some_stream_that_was_empty.sql | 2 +- ...ome_stream_that_was_empty_scd_new_data.sql | 20 +++ .../some_stream_that_was_empty_stg.sql | 2 +- ...e_stream_with_n__lting_into_long_names.sql | 2 +- ..._c__lting_into_long_names_scd_new_data.sql | 15 ++ ...ome_stream_that_was_empty_scd_new_data.sql | 15 ++ .../conflict_stream_array.sql | 12 +- .../conflict_stream_name.sql | 12 +- ...ict_stream_name___conflict_stream_name.sql | 12 +- ...flict_stream_name_conflict_stream_name.sql | 8 +- .../conflict_stream_scalar.sql | 12 +- ...ested_stream_wi__lting_into_long_names.sql | 16 +-- .../test_normalization/unnest_alias.sql | 8 +- ...t_alias_childre__column___with__quotes.sql | 12 +- .../unnest_alias_children.sql | 10 +- .../unnest_alias_children_owner.sql | 10 +- .../test_simple_streams/dbt_project.yml | 80 ++++++----- .../test_simple_streams/first_dbt_project.yml | 104 +++++++++----- .../1_prefix_startwith_number_scd.sql | 18 +-- .../dedup_cdc_excluded_scd.sql | 28 +--- .../dedup_exchange_rate_scd.sql | 30 +--- .../multiple_column_names_conflicts_scd.sql | 18 +-- .../test_normalization/pos_dedup_cdcx_scd.sql | 32 +---- .../renamed_dedup_cdc_excluded_scd.sql | 18 +-- ...1_prefix_startwith_number_scd_new_data.sql | 13 ++ .../1_prefix_startwith_number_stg.sql | 14 +- .../dedup_cdc_excluded_scd_new_data.sql | 13 ++ .../dedup_cdc_excluded_stg.sql | 18 +-- .../dedup_exchange_rate_scd_new_data.sql | 13 ++ .../dedup_exchange_rate_stg.sql | 28 +--- ...le_column_names_conflicts_scd_new_data.sql | 13 ++ .../multiple_column_names_conflicts_stg.sql | 26 +--- .../pos_dedup_cdcx_scd_new_data.sql | 13 ++ .../test_normalization/pos_dedup_cdcx_stg.sql | 20 +-- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 13 ++ .../renamed_dedup_cdc_excluded_stg.sql | 8 +- .../test_normalization/exchange_rate.sql | 34 +---- .../1_prefix_startwith_number_ab1.sql | 2 +- .../1_prefix_startwith_number_ab2.sql | 2 +- .../dedup_cdc_excluded_ab1.sql | 2 +- .../dedup_cdc_excluded_ab2.sql | 2 +- .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../multiple_column_names_conflicts_ab1.sql | 2 +- .../multiple_column_names_conflicts_ab2.sql | 2 +- .../test_normalization/pos_dedup_cdcx_ab1.sql | 2 +- .../test_normalization/pos_dedup_cdcx_ab2.sql | 2 +- .../renamed_dedup_cdc_excluded_ab1.sql | 2 +- .../renamed_dedup_cdc_excluded_ab2.sql | 2 +- .../1_prefix_startwith_number_scd.sql | 67 +++++++-- .../dedup_cdc_excluded_scd.sql | 67 +++++++-- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../multiple_column_names_conflicts_scd.sql | 67 +++++++-- .../test_normalization/pos_dedup_cdcx_scd.sql | 67 +++++++-- .../renamed_dedup_cdc_excluded_scd.sql | 67 +++++++-- .../1_prefix_startwith_number.sql | 2 +- ...1_prefix_startwith_number_scd_new_data.sql | 20 +++ .../1_prefix_startwith_number_stg.sql | 2 +- .../test_normalization/dedup_cdc_excluded.sql | 2 +- .../dedup_cdc_excluded_scd_new_data.sql | 20 +++ .../dedup_cdc_excluded_stg.sql | 2 +- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 20 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../multiple_column_names_conflicts.sql | 2 +- ...le_column_names_conflicts_scd_new_data.sql | 20 +++ .../multiple_column_names_conflicts_stg.sql | 2 +- .../test_normalization/pos_dedup_cdcx.sql | 2 +- .../pos_dedup_cdcx_scd_new_data.sql | 20 +++ .../test_normalization/pos_dedup_cdcx_stg.sql | 2 +- .../renamed_dedup_cdc_excluded.sql | 2 +- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 20 +++ .../renamed_dedup_cdc_excluded_stg.sql | 2 +- .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../renamed_dedup_cdc_excluded_ab1.sql | 2 +- .../renamed_dedup_cdc_excluded_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../renamed_dedup_cdc_excluded_scd.sql | 67 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 20 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../renamed_dedup_cdc_excluded.sql | 2 +- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 20 +++ .../renamed_dedup_cdc_excluded_stg.sql | 2 +- ...1_prefix_startwith_number_scd_new_data.sql | 15 ++ .../dedup_cdc_excluded_scd_new_data.sql | 15 ++ .../dedup_exchange_rate_scd_new_data.sql | 15 ++ ...le_column_names_conflicts_scd_new_data.sql | 15 ++ .../pos_dedup_cdcx_scd_new_data.sql | 15 ++ ...enamed_dedup_cdc_excluded_scd_new_data.sql | 15 ++ .../test_normalization/exchange_rate.sql | 34 +---- .../dedup_exchange_rate_scd_new_data.sql | 15 ++ ...enamed_dedup_cdc_excluded_scd_new_data.sql | 15 ++ .../test_normalization/exchange_rate.sql | 30 +--- .../test_nested_streams/dbt_project.yml | 134 +++++++++++++----- ..._columns_resulting_into_long_names_scd.sql | 6 +- ...ns_resulting_into_long_names_partition.sql | 6 +- ...sulting_into_long_names_partition_data.sql | 55 ++----- ...long_names_partition_double_array_data.sql | 55 ++----- ...resulting_into_long_names_scd_new_data.sql | 10 ++ ..._columns_resulting_into_long_names_ab1.sql | 2 +- ..._columns_resulting_into_long_names_ab2.sql | 2 +- ...esulting_into_long_names_partition_ab1.sql | 2 +- ...ing_into_long_names_partition_data_ab1.sql | 2 +- ..._names_partition_double_array_data_ab1.sql | 2 +- ..._columns_resulting_into_long_names_scd.sql | 67 +++++++-- ...plex_columns_resulting_into_long_names.sql | 2 +- ...ns_resulting_into_long_names_partition.sql | 2 +- ...sulting_into_long_names_partition_data.sql | 2 +- ...long_names_partition_double_array_data.sql | 2 +- ...resulting_into_long_names_scd_new_data.sql | 20 +++ ...resulting_into_long_names_scd_new_data.sql | 10 ++ .../test_simple_streams/dbt_project.yml | 82 ++++++----- .../test_simple_streams/first_dbt_project.yml | 106 +++++++++----- .../dedup_exchange_rate_scd.sql | 10 +- .../test_normalization/exchange_rate.sql | 30 ++-- .../dedup_exchange_rate_scd_new_data.sql | 10 ++ .../dedup_exchange_rate_stg.sql | 26 ++-- .../multiple_column_names_conflicts_stg.sql | 20 +-- .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 20 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 69 +++++++-- .../dedup_exchange_rate.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 20 +++ .../dedup_exchange_rate_stg.sql | 2 +- .../test_normalization/exchange_rate.sql | 30 ++-- .../dedup_exchange_rate_scd_new_data.sql | 10 ++ .../dedup_exchange_rate_stg.sql | 26 ++-- .../test_normalization/exchange_rate.sql | 28 ++-- .../dedup_exchange_rate_scd_new_data.sql | 10 ++ .../dedup_exchange_rate_stg.sql | 24 ++-- .../test_nested_streams/dbt_project.yml | 2 + ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 2 +- ...RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql | 11 ++ ..._COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql | 2 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql | 2 +- ...ESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql | 2 +- ...ING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql | 2 +- ..._NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql | 2 +- ...PLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql | 2 +- ...NS_RESULTING_INTO_LONG_NAMES_PARTITION.sql | 2 +- ...SULTING_INTO_LONG_NAMES_PARTITION_DATA.sql | 2 +- ...LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql | 2 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 67 +++++++-- ...RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql | 20 +++ ...RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql | 11 ++ .../test_simple_streams/dbt_project.yml | 6 + .../DEDUP_EXCHANGE_RATE_SCD.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql | 11 ++ .../DEDUP_EXCHANGE_RATE_AB1.sql | 2 +- .../DEDUP_EXCHANGE_RATE_AB2.sql | 2 +- .../DEDUP_EXCHANGE_RATE.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD.sql | 69 +++++++-- .../DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql | 20 +++ .../DEDUP_EXCHANGE_RATE_STG.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql | 11 ++ 282 files changed, 3203 insertions(+), 1683 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml index 7631ef356dc92..2d19183efb914 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,79 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 59cf6d3a78044..eaaa727c2ab48 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -9,7 +9,7 @@ OPTIONS() as ( --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends on: `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data` with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..c31b4342f359b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql @@ -0,0 +1,11 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data` + OPTIONS() + as +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') + +select * from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg` + +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql index 9f8d6b5f44c59..b988a169ef1f2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql @@ -17,5 +17,5 @@ select from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias -- nested_stream_with_complex_columns_resulting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql index 12ad3a51c83d6..3c6ed6e761a2b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql @@ -17,5 +17,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} -- nested_stream_with_complex_columns_resulting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql index 0dcbf25c475ce..3ada03a427fe1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql @@ -18,5 +18,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partit {{ cross_join_unnest('partition', 'DATA') }} where 1 = 1 and DATA is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql index 7a6fbe78ed1cf..0734951e51265 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql @@ -17,5 +17,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql index 50893664fdb4a..912073c317273 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql @@ -18,5 +18,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partit {{ cross_join_unnest('partition', 'double_array_data') }} where 1 = 1 and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index d814d04ecc61a..c7cab93202373 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -3,32 +3,71 @@ partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_complex_columns_resulting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }}","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -42,7 +81,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} union all select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql index 5009469d5e7b9..c0bd55eeb61d0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -20,5 +20,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql index 955c2a891bba8..f8cd174b2a5b7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -17,5 +17,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql index ac5be7d87262d..861e33d4859a1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql @@ -16,5 +16,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }} -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql index 91f3e95fddbe0..c6b980124a5a6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -16,5 +16,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..c3d4e9d14cbde --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} +-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..c31b4342f359b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql @@ -0,0 +1,11 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data` + OPTIONS() + as +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') + +select * from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg` + +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml index 88dde818dd4dd..de82a6ed3bfa8 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["modified_models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - modified_models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,27 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml index 7631ef356dc92..7ae3dd92eace7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,51 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index d7fd59df15b5d..1b35a3d334830 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -9,7 +9,7 @@ OPTIONS() as ( --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..1ee899b1ca7d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,11 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` + OPTIONS() + as +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 0555b00e382a7..8ef08eb1d426d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -22,5 +22,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 6df3dfdc25526..eb02cc4ecf859 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -22,5 +22,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cd673ea4b56cf..240e35e088fc9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,21 +3,62 @@ partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -26,11 +67,11 @@ new_data_ids as ( 'currency', 'NZD', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -44,7 +85,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 3e23097c346f4..eb3c93754b6b0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -25,5 +25,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..d4132ab83a4d3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 1c8897f665eaf..45262775f20b1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -22,5 +22,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 13316b96ee548..b86bc98fe997f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -22,5 +22,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 96b419dd87f9d..09146ddd1c9f8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -22,5 +22,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index ccec637092e39..78fd50f1b53d4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,21 +3,62 @@ partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -26,11 +67,11 @@ new_data_ids as ( 'currency', 'NZD', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -44,7 +85,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index fabc0a638c025..96601fc9d2873 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -25,5 +25,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..d4132ab83a4d3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index c67573e91762e..da37e7dc7eaeb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -22,5 +22,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..1ee899b1ca7d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,11 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` + OPTIONS() + as +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..1ee899b1ca7d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,11 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` + OPTIONS() + as +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml index db791a568a0b9..3854bb416d73e 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -data-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +source-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +data-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +modules-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `source-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -49,8 +33,6 @@ models: +materialized: ephemeral airbyte_incremental: +tags: incremental_tables - # incremental is not enabled for MySql yet - #+materialized: incremental +materialized: table airbyte_tables: +tags: normalized_tables @@ -58,6 +40,76 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - vars: - dbt_utils_dispatch_list: ["airbyte_utils"] + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + nested_stream_with_co_1g_into_long_names_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_stg: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1ng_names_scd_new_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_scd: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co__lting_into_long_names: test_normalization._airbyte_raw_nested_s__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab1: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab2: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab3: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit__lting_into_long_names: test_normalization._airbyte_raw_non_nest__lting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_na_1g_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na_1g_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na_1g_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_co_2g_names_partition_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_2g_names_partition_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_2g_names_partition_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co___long_names_partition: test_normalization._airbyte_raw_nested_s__lting_into_long_names + conflict_stream_name__2flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__2flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__2flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_co_3double_array_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3double_array_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3double_array_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co__ion_double_array_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co___names_partition_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + conflict_stream_name__3flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__3flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__3flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name____conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index e5f3e4859deba..a78a228cfe085 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -4,7 +4,7 @@ test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` as ( --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +-- depends on: _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data` with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql new file mode 100644 index 0000000000000..cadcb57e95c57 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql @@ -0,0 +1,9 @@ + + create view _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data__dbt_tmp` as ( + +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') + +select * from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql index 22b025402fdce..d638e7a898ff3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql @@ -15,5 +15,5 @@ select from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} as table_alias -- nested_stream_with_co__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql index 6f090707a2ba7..a86a84248a87c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} -- nested_stream_with_co__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql index a98153d35d87f..427a929211b27 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql @@ -15,5 +15,5 @@ from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} as table_alias -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql index 2e8698e569518..a8ca4bbb7d40f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql @@ -16,5 +16,5 @@ from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias {{ cross_join_unnest('partition', 'double_array_data') }} where 1 = 1 and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql index 241d66624840f..cdf1151ee10d7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql @@ -16,5 +16,5 @@ from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias {{ cross_join_unnest('partition', adapter.quote('DATA')) }} where 1 = 1 and {{ adapter.quote('DATA') }} is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index d0e8e603259f3..122714df22215 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -1,32 +1,71 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_co__lting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }}","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +-- depends on: {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} - -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -40,7 +79,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from new_data + select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} union all select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql index 155daecc1f2c1..0c8adc779de9f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql index 3dfd623645782..92e44abc92988 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -14,5 +14,5 @@ select from {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_co___long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql index 3bd5623a79870..6a17d6258b3e6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -14,5 +14,5 @@ select from {{ ref('nested_stream_with_co_3double_array_data_ab3') }} -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_co___long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql index f56a95685e589..0ea84390902e9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -18,5 +18,5 @@ from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql new file mode 100644 index 0000000000000..f240e35e6d7dd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} +-- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index e5f3e4859deba..a78a228cfe085 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -4,7 +4,7 @@ test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` as ( --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +-- depends on: _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data` with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql new file mode 100644 index 0000000000000..cadcb57e95c57 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql @@ -0,0 +1,9 @@ + + create view _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data__dbt_tmp` as ( + +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') + +select * from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml index db791a568a0b9..e47faa78dc483 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -data-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +source-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +data-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +modules-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `source-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -49,8 +33,6 @@ models: +materialized: ephemeral airbyte_incremental: +tags: incremental_tables - # incremental is not enabled for MySql yet - #+materialized: incremental +materialized: table airbyte_tables: +tags: normalized_tables @@ -58,6 +40,48 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - vars: - dbt_utils_dispatch_list: ["airbyte_utils"] + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 59d722cb4f381..98f20af5708e7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ test_normalization.`dedup_exchange_rate_scd__dbt_tmp` as ( --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..9acd13c900265 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,9 @@ + + create view _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data__dbt_tmp` as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from _airbyte_test_normalization.`dedup_exchange_rate_stg` + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 9b09b69fc5c24..670db0869ae22 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -20,5 +20,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index ca93b9a8d5364..6ac42bbbe4769 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -23,5 +23,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 9bf09bdcaa8ff..7de0f15fb0471 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,21 +1,62 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -24,11 +65,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -42,7 +83,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 07a2d8f3765c8..dd4432bd60a5e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -23,5 +23,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..da8774679c71f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 57c500151e062..86ec2c9e8b1b7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -20,5 +20,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 59d722cb4f381..98f20af5708e7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ test_normalization.`dedup_exchange_rate_scd__dbt_tmp` as ( --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..9acd13c900265 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,9 @@ + + create view _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data__dbt_tmp` as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from _airbyte_test_normalization.`dedup_exchange_rate_stg` + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml index 7ad95ea5f9414..9705c3694a774 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml @@ -1,43 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -data-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +source-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +data-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +modules-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: false schema: false identifier: false - -# You can define configurations for models in the `source-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -47,8 +33,6 @@ models: +materialized: ephemeral airbyte_incremental: +tags: incremental_tables - # incremental is not enabled for Oracle yet - #+materialized: incremental +materialized: table airbyte_tables: +tags: normalized_tables @@ -56,6 +40,48 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - vars: - dbt_utils_dispatch_list: ["airbyte_utils"] + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization.airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization.airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization.airbyte_raw_exchange_rate + exchange_rate: test_normalization.airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization.airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd_new_data: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization.airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd_new_data: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization.airbyte_raw_pos_dedup_cdcx + ab_1_prefix_startwith_number_ab1: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_ab2: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_stg: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_scd_new_data: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_scd: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number: test_normalization.airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd_new_data: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization.airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cfd186b006ae3..91f927adaeb64 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ as --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: test_normalization.dedup_exchange_rate_scd_new_data with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..e0c751a0dadfe --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,9 @@ + + create view test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp as + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from test_normalization.dedup_exchange_rate_stg + + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index ccd95966bfc79..f6b2863d9c445 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -20,5 +20,5 @@ select from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT')) }} +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 97defa7b1ba23..f3158bc2e9193 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -20,5 +20,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT')) }} +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 712f6bd747522..c4348e9bb1e80 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,21 +1,62 @@ {{ config( unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}", schema = "test_normalization", - post_hook = ["drop view test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and quote('_AIRBYTE_UNIQUE_KEY') in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 + ) + select modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} from scd_active_rows + right outer join modified_ids on modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + group by modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + having count(scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }}) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT')) }} -), new_data_ids as ( -- build a subset of {{ quote('_AIRBYTE_UNIQUE_KEY') }} from rows that are new select distinct @@ -24,11 +65,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -42,7 +83,7 @@ previous_active_scd_data as ( where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index e361c63648094..316e400418353 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -23,5 +23,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT')) }} +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..a52c371a98ec6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = quote('_AIRBYTE_AB_ID'), + schema = "test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index c78f87fe59f19..15c9c07d71e9a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -20,5 +20,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT')) }} +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cfd186b006ae3..91f927adaeb64 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ as --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: test_normalization.dedup_exchange_rate_scd_new_data with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..e0c751a0dadfe --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,9 @@ + + create view test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp as + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from test_normalization.dedup_exchange_rate_stg + + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml index 7631ef356dc92..d2abaa759df50 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,79 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + nested_stream_with_c__lting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_scd_new_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_n__lting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_n__lting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_n__lting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_n__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_c___long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_c__ion_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__ion_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__ion_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__ion_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name___conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name___conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name___conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name___conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index b5d7f740ba6eb..2c023fc2cf413 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" as ( --- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') +-- depends on: "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", "date", "partition", @@ -55,15 +51,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 53ef64cb928a8..9864ea8e0bf21 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."some_stream_that_was_empty_scd" as ( --- depends_on: ref('some_stream_that_was_empty_stg') +-- depends on: "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", "date", "date" as _airbyte_start_at, @@ -54,15 +50,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql index bb7fbe5b2852e..c2170eeb4df25 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql @@ -40,15 +40,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab2 select - md5(cast(coalesce(cast(_airbyte_nested_stre__nto_long_names_hashid as - varchar -), '') || '-' || coalesce(cast(double_array_data as - varchar -), '') || '-' || coalesce(cast("DATA" as - varchar -), '') as - varchar -)) as _airbyte_partition_hashid, + md5(cast(coalesce(cast(_airbyte_nested_stre__nto_long_names_hashid as text), '') || '-' || coalesce(cast(double_array_data as text), '') || '-' || coalesce(cast("DATA" as text), '') as text)) as _airbyte_partition_hashid, tmp.* from __dbt__cte__nested_stream_with_c___long_names_partition_ab2 tmp -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql index 76d0f6c37973b..36a8a151153a7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql @@ -31,9 +31,7 @@ and "DATA" is not null -- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab1 select _airbyte_partition_hashid, - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -46,13 +44,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab2 select - md5(cast(coalesce(cast(_airbyte_partition_hashid as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') as - varchar -)) as _airbyte_data_hashid, + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_data_hashid, tmp.* from __dbt__cte__nested_stream_with_c___names_partition_data_ab2 tmp -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql index 7ffecd5d71c96..4b6ec78084879 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql @@ -31,9 +31,7 @@ and double_array_data is not null -- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 select _airbyte_partition_hashid, - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -46,13 +44,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 select - md5(cast(coalesce(cast(_airbyte_partition_hashid as - varchar -), '') || '-' || coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_double_array_data_hashid, + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast("id" as text), '') as text)) as _airbyte_double_array_data_hashid, tmp.* from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 tmp -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..678a936379361 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" + as ( + +-- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') + +select * from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql index 0b9498b274531..9062ea955a071 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql @@ -26,12 +26,8 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 select - cast("id" as - varchar -) as "id", - cast("date" as - varchar -) as "date", + cast("id" as text) as "id", + cast("date" as text) as "date", cast("partition" as jsonb ) as "partition", @@ -45,15 +41,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast("partition" as - varchar -), '') as - varchar -)) as _airbyte_nested_stre__nto_long_names_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast("partition" as text), '') as text)) as _airbyte_nested_stre__nto_long_names_hashid, tmp.* from __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 tmp -- nested_stream_with_c__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql new file mode 100644 index 0000000000000..061fe986f6fcf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" + as ( + +-- depends_on: ref('some_stream_that_was_empty_stg') + +select * from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql index a4af81ada08aa..e473519de41aa 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql @@ -23,12 +23,8 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__some_stream_that_was_empty_ab1 select - cast("id" as - varchar -) as "id", - cast("date" as - varchar -) as "date", + cast("id" as text) as "id", + cast("date" as text) as "date", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -39,13 +35,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__some_stream_that_was_empty_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') as - varchar -)) as _airbyte_some_stream_that_was_empty_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_some_stream_that_was_empty_hashid, tmp.* from __dbt__cte__some_stream_that_was_empty_ab2 tmp -- some_stream_that_was_empty diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql index ba7fb38537070..aea94f43825c1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql @@ -23,12 +23,8 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 select - cast("id" as - varchar -) as "id", - cast("date" as - varchar -) as "date", + cast("id" as text) as "id", + cast("date" as text) as "date", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -41,13 +37,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') as - varchar -)) as _airbyte_simple_stre__nto_long_names_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_simple_stre__nto_long_names_hashid, tmp.* from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 tmp -- simple_stream_with_n__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql index d5c47531a891e..c1c6ab12a7b7c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql @@ -21,9 +21,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__conflict_stream_array_ab1 select - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", conflict_stream_array, _airbyte_ab_id, _airbyte_emitted_at, @@ -36,13 +34,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_array_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(conflict_stream_array as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_array_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_array as text), '') as text)) as _airbyte_conflict_stream_array_hashid, tmp.* from __dbt__cte__conflict_stream_array_ab2 tmp -- conflict_stream_array diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql index dba6f29e197c0..ac5cffb8d00d9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql @@ -23,9 +23,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__conflict_stream_name_ab1 select - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", cast(conflict_stream_name as jsonb ) as conflict_stream_name, @@ -40,13 +38,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_name_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(conflict_stream_name as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_name_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_hashid, tmp.* from __dbt__cte__conflict_stream_name_ab2 tmp -- conflict_stream_name diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql index 55404b7974428..4aa2c420ed45d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql @@ -23,9 +23,7 @@ and conflict_stream_name is not null -- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 select _airbyte_conflict_stream_name_2_hashid, - cast(groups as - varchar -) as groups, + cast(groups as text) as groups, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -37,13 +35,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as - varchar -), '') || '-' || coalesce(cast(groups as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_name_3_hashid, + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as text), '') || '-' || coalesce(cast(groups as text), '') as text)) as _airbyte_conflict_stream_name_3_hashid, tmp.* from __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 tmp -- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql index ea9792be5a9fd..82dfb023674e5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql @@ -39,13 +39,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as - varchar -), '') || '-' || coalesce(cast(conflict_stream_name as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_name_2_hashid, + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_2_hashid, tmp.* from __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 tmp -- conflict_stream_name at conflict_stream_name/conflict_stream_name diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql index fec20e8f1d5e9..09a4fa01de977 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql @@ -21,9 +21,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__conflict_stream_scalar_ab1 select - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", cast(conflict_stream_scalar as bigint ) as conflict_stream_scalar, @@ -38,13 +36,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_scalar_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(conflict_stream_scalar as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_scalar_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_scalar as text), '') as text)) as _airbyte_conflict_stream_scalar_hashid, tmp.* from __dbt__cte__conflict_stream_scalar_ab2 tmp -- conflict_stream_scalar diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql index 3b267eea4346c..31d2176c3888c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql @@ -21,12 +21,8 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 select - cast("id" as - varchar -) as "id", - cast("date" as - varchar -) as "date", + cast("id" as text) as "id", + cast("date" as text) as "date", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -38,13 +34,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') as - varchar -)) as _airbyte_non_nested___nto_long_names_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_non_nested___nto_long_names_hashid, tmp.* from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 tmp -- non_nested_stream_wi__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql index 4a7cb02c98d01..7af2f04f81f87 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql @@ -36,13 +36,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(children as - varchar -), '') as - varchar -)) as _airbyte_unnest_alias_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(children as text), '') as text)) as _airbyte_unnest_alias_hashid, tmp.* from __dbt__cte__unnest_alias_ab2 tmp -- unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql index a3cbb5c562e74..6688069a62f01 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql @@ -29,9 +29,7 @@ and "column`_'with""_quotes" is not null -- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 select _airbyte_owner_hashid, - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -43,13 +41,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 select - md5(cast(coalesce(cast(_airbyte_owner_hashid as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') as - varchar -)) as _airbyte_column___with__quotes_hashid, + md5(cast(coalesce(cast(_airbyte_owner_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_column___with__quotes_hashid, tmp.* from __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 tmp -- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql index a67bbcdbc1efc..779394d5765dc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql @@ -49,15 +49,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_children_ab2 select - md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as - varchar -), '') || '-' || coalesce(cast(ab_id as - varchar -), '') || '-' || coalesce(cast("owner" as - varchar -), '') as - varchar -)) as _airbyte_children_hashid, + md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as text), '') || '-' || coalesce(cast(ab_id as text), '') || '-' || coalesce(cast("owner" as text), '') as text)) as _airbyte_children_hashid, tmp.* from __dbt__cte__unnest_alias_children_ab2 tmp -- children at unnest_alias/children diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql index 860b4d724bbb2..651e1c11914eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql @@ -39,15 +39,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_children_owner_ab2 select - md5(cast(coalesce(cast(_airbyte_children_hashid as - varchar -), '') || '-' || coalesce(cast(owner_id as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') as - varchar -)) as _airbyte_owner_hashid, + md5(cast(coalesce(cast(_airbyte_children_hashid as text), '') || '-' || coalesce(cast(owner_id as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_owner_hashid, tmp.* from __dbt__cte__unnest_alias_children_owner_ab2 tmp -- owner at unnest_alias/children/owner diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql index 4e4705096dab8..78f7cfe9bea5e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql @@ -9,7 +9,7 @@ select {{ dbt_utils.surrogate_key([ adapter.quote('id'), - 'conflict_stream_name', + object_to_string('conflict_stream_name'), ]) }} as _airbyte_conflict_stream_name_hashid, tmp.* from {{ ref('conflict_stream_name_ab2') }} tmp diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql index 0c8e2992b9762..0892d61432767 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql @@ -8,7 +8,7 @@ select {{ dbt_utils.surrogate_key([ '_airbyte_conflict_stream_name_hashid', - 'conflict_stream_name', + object_to_string('conflict_stream_name'), ]) }} as _airbyte_conflict_stream_name_2_hashid, tmp.* from {{ ref('conflict_stream_name_conflict_stream_name_ab2') }} tmp diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql index 6be1492d1a763..fafabe2d98407 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql @@ -16,5 +16,5 @@ from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} as table_alias -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql index 34c79fa90c6ae..a622952dbeff9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_c___long_names_partition_ab1') }} -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql index 71d2f61739ebc..3eb1b81838277 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_c___long_names_partition_ab2') }} tmp -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql index 6f510faef59b2..0aab8469aefd2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql @@ -17,5 +17,5 @@ from {{ ref('nested_stream_with_c___long_names_partition') }} as table_alias {{ cross_join_unnest('partition', adapter.quote('DATA')) }} where 1 = 1 and {{ adapter.quote('DATA') }} is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql index 916726e052fda..f6cb35f7d406b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql @@ -14,5 +14,5 @@ select from {{ ref('nested_stream_with_c___names_partition_data_ab1') }} -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql index c50169f54edef..f06e21a1432e6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql @@ -14,5 +14,5 @@ select from {{ ref('nested_stream_with_c___names_partition_data_ab2') }} tmp -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql index 193f3ba04dddd..5f674cdcd1a69 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql @@ -17,5 +17,5 @@ from {{ ref('nested_stream_with_c___long_names_partition') }} as table_alias {{ cross_join_unnest('partition', 'double_array_data') }} where 1 = 1 and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql index b66908781226a..6d785589955da 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql @@ -14,5 +14,5 @@ select from {{ ref('nested_stream_with_c__ion_double_array_data_ab1') }} -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql index c586286df6d31..c83657e465f6f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql @@ -14,5 +14,5 @@ select from {{ ref('nested_stream_with_c__ion_double_array_data_ab2') }} tmp -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql index 49ae7cb8fc1f5..767a1071f1745 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql @@ -16,5 +16,5 @@ select from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias -- nested_stream_with_c__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql index 9971fec8280c2..6739cf914f383 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql @@ -16,5 +16,5 @@ select from {{ ref('nested_stream_with_c__lting_into_long_names_ab1') }} -- nested_stream_with_c__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql index 1f6710e4f97a5..6862a6ac2688c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql @@ -15,5 +15,5 @@ select from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} as table_alias -- some_stream_that_was_empty where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql index ab64cad9c7328..258f8b697b564 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql @@ -15,5 +15,5 @@ select from {{ ref('some_stream_that_was_empty_ab1') }} -- some_stream_that_was_empty where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql index e262bd8da748c..e5a3aa0268c54 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql @@ -9,7 +9,7 @@ select {{ dbt_utils.surrogate_key([ '_airbyte_unnest_alias_hashid', 'ab_id', - adapter.quote('owner'), + object_to_string(adapter.quote('owner')), ]) }} as _airbyte_children_hashid, tmp.* from {{ ref('unnest_alias_children_ab2') }} tmp diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql index a77b0f0ac7278..b732876827659 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql @@ -15,5 +15,5 @@ select from {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} as table_alias -- simple_stream_with_n__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql index b19efa39ea610..a2f35bfcefb1c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql @@ -15,5 +15,5 @@ select from {{ ref('simple_stream_with_n__lting_into_long_names_ab1') }} -- simple_stream_with_n__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql index 3ab506bf52d12..231ba585f7024 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql @@ -15,5 +15,5 @@ select from {{ ref('simple_stream_with_n__lting_into_long_names_ab2') }} tmp -- simple_stream_with_n__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 03e7d58bbeab2..b854279937925 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_c__lting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }})","delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') +-- depends on: {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} - -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from new_data + select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} union all select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 0caa4d9bfc659..6518047a27dd4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='some_stream_that_was_empty' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('some_stream_that_was_empty_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('some_stream_that_was_empty_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('some_stream_that_was_empty_scd_new_data') }})","delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('some_stream_that_was_empty_stg') +-- depends on: {{ ref('some_stream_that_was_empty_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('some_stream_that_was_empty_stg') }} - -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('some_stream_that_was_empty_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('some_stream_that_was_empty_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from new_data + select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from {{ ref('some_stream_that_was_empty_scd_new_data') }} union all select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql index c79a3b8f56cba..92e9c5d4fe088 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql @@ -16,5 +16,5 @@ select from {{ ref('nested_stream_with_c___long_names_partition_ab3') }} -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql index 428b290262c64..f453cd838e21f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_c___names_partition_data_ab3') }} -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_c___long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql index db604519f873d..ea7bc2e780956 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_c__ion_double_array_data_ab3') }} -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_c___long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql index 83d89faed2fa0..26c3aded7063d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql @@ -19,5 +19,5 @@ from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..d148a2b0c5033 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} +-- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql index 69a21c2c6bff3..8249fe95741a4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql @@ -10,11 +10,11 @@ select {{ dbt_utils.surrogate_key([ adapter.quote('id'), adapter.quote('date'), - adapter.quote('partition'), + object_to_string(adapter.quote('partition')), ]) }} as _airbyte_nested_stre__nto_long_names_hashid, tmp.* from {{ ref('nested_stream_with_c__lting_into_long_names_ab2') }} tmp -- nested_stream_with_c__lting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql index 4c66d140893ac..23bcd85bcf91c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql @@ -18,5 +18,5 @@ from {{ ref('some_stream_that_was_empty_scd') }} -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql new file mode 100644 index 0000000000000..90f82517a2105 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('some_stream_that_was_empty_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('some_stream_that_was_empty_stg') }} +-- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('some_stream_that_was_empty_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql index a5849d296b633..ca645527eca86 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql @@ -15,5 +15,5 @@ select from {{ ref('some_stream_that_was_empty_ab2') }} tmp -- some_stream_that_was_empty where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql index e0900f1be28e3..7f70fc83c6163 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql @@ -16,5 +16,5 @@ select from {{ ref('simple_stream_with_n__lting_into_long_names_ab3') }} -- simple_stream_with_n__lting_into_long_names from {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..68cf364dae0c2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" ("_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql new file mode 100644 index 0000000000000..2c2743107273e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "some_stream_that_was_empty_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" ("_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "some_stream_that_was_empty_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql index d5c47531a891e..c1c6ab12a7b7c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql @@ -21,9 +21,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__conflict_stream_array_ab1 select - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", conflict_stream_array, _airbyte_ab_id, _airbyte_emitted_at, @@ -36,13 +34,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_array_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(conflict_stream_array as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_array_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_array as text), '') as text)) as _airbyte_conflict_stream_array_hashid, tmp.* from __dbt__cte__conflict_stream_array_ab2 tmp -- conflict_stream_array diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql index dba6f29e197c0..ac5cffb8d00d9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql @@ -23,9 +23,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__conflict_stream_name_ab1 select - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", cast(conflict_stream_name as jsonb ) as conflict_stream_name, @@ -40,13 +38,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_name_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(conflict_stream_name as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_name_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_hashid, tmp.* from __dbt__cte__conflict_stream_name_ab2 tmp -- conflict_stream_name diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql index 55404b7974428..4aa2c420ed45d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql @@ -23,9 +23,7 @@ and conflict_stream_name is not null -- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 select _airbyte_conflict_stream_name_2_hashid, - cast(groups as - varchar -) as groups, + cast(groups as text) as groups, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -37,13 +35,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as - varchar -), '') || '-' || coalesce(cast(groups as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_name_3_hashid, + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as text), '') || '-' || coalesce(cast(groups as text), '') as text)) as _airbyte_conflict_stream_name_3_hashid, tmp.* from __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 tmp -- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql index ea9792be5a9fd..82dfb023674e5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql @@ -39,13 +39,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 select - md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as - varchar -), '') || '-' || coalesce(cast(conflict_stream_name as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_name_2_hashid, + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_2_hashid, tmp.* from __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 tmp -- conflict_stream_name at conflict_stream_name/conflict_stream_name diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql index fec20e8f1d5e9..09a4fa01de977 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql @@ -21,9 +21,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__conflict_stream_scalar_ab1 select - cast("id" as - varchar -) as "id", + cast("id" as text) as "id", cast(conflict_stream_scalar as bigint ) as conflict_stream_scalar, @@ -38,13 +36,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__conflict_stream_scalar_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(conflict_stream_scalar as - varchar -), '') as - varchar -)) as _airbyte_conflict_stream_scalar_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_scalar as text), '') as text)) as _airbyte_conflict_stream_scalar_hashid, tmp.* from __dbt__cte__conflict_stream_scalar_ab2 tmp -- conflict_stream_scalar diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql index 3b267eea4346c..31d2176c3888c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql @@ -21,12 +21,8 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 select - cast("id" as - varchar -) as "id", - cast("date" as - varchar -) as "date", + cast("id" as text) as "id", + cast("date" as text) as "date", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -38,13 +34,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') as - varchar -)) as _airbyte_non_nested___nto_long_names_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_non_nested___nto_long_names_hashid, tmp.* from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 tmp -- non_nested_stream_wi__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql index 4a7cb02c98d01..7af2f04f81f87 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql @@ -36,13 +36,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(children as - varchar -), '') as - varchar -)) as _airbyte_unnest_alias_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(children as text), '') as text)) as _airbyte_unnest_alias_hashid, tmp.* from __dbt__cte__unnest_alias_ab2 tmp -- unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql index a3cbb5c562e74..6688069a62f01 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql @@ -29,9 +29,7 @@ and "column`_'with""_quotes" is not null -- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 select _airbyte_owner_hashid, - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -43,13 +41,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 select - md5(cast(coalesce(cast(_airbyte_owner_hashid as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') as - varchar -)) as _airbyte_column___with__quotes_hashid, + md5(cast(coalesce(cast(_airbyte_owner_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_column___with__quotes_hashid, tmp.* from __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 tmp -- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql index a67bbcdbc1efc..779394d5765dc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql @@ -49,15 +49,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_children_ab2 select - md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as - varchar -), '') || '-' || coalesce(cast(ab_id as - varchar -), '') || '-' || coalesce(cast("owner" as - varchar -), '') as - varchar -)) as _airbyte_children_hashid, + md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as text), '') || '-' || coalesce(cast(ab_id as text), '') || '-' || coalesce(cast("owner" as text), '') as text)) as _airbyte_children_hashid, tmp.* from __dbt__cte__unnest_alias_children_ab2 tmp -- children at unnest_alias/children diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql index 860b4d724bbb2..651e1c11914eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql @@ -39,15 +39,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__unnest_alias_children_owner_ab2 select - md5(cast(coalesce(cast(_airbyte_children_hashid as - varchar -), '') || '-' || coalesce(cast(owner_id as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') as - varchar -)) as _airbyte_owner_hashid, + md5(cast(coalesce(cast(_airbyte_children_hashid as text), '') || '-' || coalesce(cast(owner_id as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_owner_hashid, tmp.* from __dbt__cte__unnest_alias_children_owner_ab2 tmp -- owner at unnest_alias/children/owner diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml index 88dde818dd4dd..de82a6ed3bfa8 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["modified_models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - modified_models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,27 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml index 7631ef356dc92..7ae3dd92eace7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,51 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index 203534b3d53b5..4631e0da21e19 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."1_prefix_startwith_number_scd" as ( --- depends_on: ref('1_prefix_startwith_number_stg') +-- depends on: "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", "date", "text", @@ -55,15 +51,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index c9c2e087d956b..58751dbb4ce37 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."dedup_cdc_excluded_scd" as ( --- depends_on: ref('dedup_cdc_excluded_stg') +-- depends on: "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", "name", _ab_cdc_lsn, @@ -56,26 +52,10 @@ dedup_data as ( partition by _airbyte_unique_key, _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as - varchar -), cast(_ab_cdc_updated_at as - varchar -) + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as text), cast(_ab_cdc_updated_at as text) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_updated_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3db3150ff2766..abdce6b556e5b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."dedup_exchange_rate_scd" as ( --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" with input_data as ( @@ -16,15 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') || '-' || coalesce(cast(nzd as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(nzd as text), '') as text)) as _airbyte_unique_key, "id", currency, "date", @@ -35,18 +27,14 @@ scd_data as ( usd, "date" as _airbyte_start_at, lag("date") over ( - partition by "id", currency, cast(nzd as - varchar -) + partition by "id", currency, cast(nzd as text) order by "date" is null asc, "date" desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( - partition by "id", currency, cast(nzd as - varchar -) + partition by "id", currency, cast(nzd as text) order by "date" is null asc, "date" desc, @@ -68,15 +56,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index e94644c18a173..b40a2b6ba7b63 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."multiple_column_names_conflicts_scd" as ( --- depends_on: ref('multiple_column_names_conflicts_stg') +-- depends on: "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", "User Id", user_id, @@ -59,15 +55,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 438b303238b5e..e6e68674b9cf2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."pos_dedup_cdcx_scd" as ( --- depends_on: ref('pos_dedup_cdcx_stg') +-- depends on: "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", "name", _ab_cdc_lsn, @@ -59,30 +55,10 @@ dedup_data as ( partition by _airbyte_unique_key, _airbyte_start_at, - _airbyte_emitted_at, cast(_ab_cdc_deleted_at as - varchar -), cast(_ab_cdc_updated_at as - varchar -), cast(_ab_cdc_log_pos as - varchar -) + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as text), cast(_ab_cdc_updated_at as text), cast(_ab_cdc_log_pos as text) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_updated_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_log_pos as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_log_pos as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 414ed447cc0b5..2dc86c7a971f8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" as ( --- depends_on: ref('renamed_dedup_cdc_excluded_stg') +-- depends on: "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" with input_data as ( @@ -16,11 +16,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast("id" as - varchar -), '') as - varchar -)) as _airbyte_unique_key, + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, "id", _ab_cdc_updated_at, _ab_cdc_updated_at as _airbyte_start_at, @@ -54,15 +50,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as - varchar -), '') || '-' || coalesce(cast(_airbyte_start_at as - varchar -), '') || '-' || coalesce(cast(_airbyte_emitted_at as - varchar -), '') as - varchar -)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql new file mode 100644 index 0000000000000..dac4eacc8fed9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" + as ( + +-- depends_on: ref('1_prefix_startwith_number_stg') + +select * from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql index 1d6a4096615f8..94b51fa8be0bd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -30,9 +30,7 @@ select cast(nullif("date", '') as date ) as "date", - cast("text" as - varchar -) as "text", + cast("text" as text) as "text", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -43,15 +41,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__1_prefix_startwith_number_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast("text" as - varchar -), '') as - varchar -)) as _airbyte_1_prefix_startwith_number_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast("text" as text), '') as text)) as _airbyte_1_prefix_startwith_number_hashid, tmp.* from __dbt__cte__1_prefix_startwith_number_ab2 tmp -- 1_prefix_startwith_number diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..7b4aed326b37e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" + as ( + +-- depends_on: ref('dedup_cdc_excluded_stg') + +select * from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql index 6eaa134afe674..1c688fb2faa56 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -29,9 +29,7 @@ select cast("id" as bigint ) as "id", - cast("name" as - varchar -) as "name", + cast("name" as text) as "name", cast(_ab_cdc_lsn as float ) as _ab_cdc_lsn, @@ -51,19 +49,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__dedup_cdc_excluded_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("name" as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_lsn as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_updated_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as - varchar -), '') as - varchar -)) as _airbyte_dedup_cdc_excluded_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("name" as text), '') || '-' || coalesce(cast(_ab_cdc_lsn as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') as text)) as _airbyte_dedup_cdc_excluded_hashid, tmp.* from __dbt__cte__dedup_cdc_excluded_ab2 tmp -- dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..4b181b8a132e8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql index 55fe38117c0d7..128ec051327d6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -32,9 +32,7 @@ select cast("id" as bigint ) as "id", - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, cast(nullif("date", '') as date ) as "date", @@ -44,9 +42,7 @@ select cast("HKD@spéçiäl & characters" as float ) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - varchar -) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, @@ -63,25 +59,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__dedup_exchange_rate_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast(timestamp_col as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(hkd_special___characters as - varchar -), '') || '-' || coalesce(cast(nzd as - varchar -), '') || '-' || coalesce(cast(usd as - varchar -), '') as - varchar -)) as _airbyte_dedup_exchange_rate_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, tmp.* from __dbt__cte__dedup_exchange_rate_ab2 tmp -- dedup_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql new file mode 100644 index 0000000000000..c748f1f836ec1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" + as ( + +-- depends_on: ref('multiple_column_names_conflicts_stg') + +select * from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql index 7a2c133f995f7..dbb4726faf8f3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -31,9 +31,7 @@ select cast("id" as bigint ) as "id", - cast("User Id" as - varchar -) as "User Id", + cast("User Id" as text) as "User Id", cast(user_id as float ) as user_id, @@ -43,9 +41,7 @@ select cast("user id" as float ) as "user id", - cast("User@Id" as - varchar -) as "User@Id", + cast("User@Id" as text) as "User@Id", cast(userid as float ) as userid, @@ -59,23 +55,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("User Id" as - varchar -), '') || '-' || coalesce(cast(user_id as - varchar -), '') || '-' || coalesce(cast("User id" as - varchar -), '') || '-' || coalesce(cast("user id" as - varchar -), '') || '-' || coalesce(cast("User@Id" as - varchar -), '') || '-' || coalesce(cast(userid as - varchar -), '') as - varchar -)) as _airbyte_multiple_co__ames_conflicts_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("User Id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("User id" as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast("User@Id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_co__ames_conflicts_hashid, tmp.* from __dbt__cte__multiple_column_names_conflicts_ab2 tmp -- multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql new file mode 100644 index 0000000000000..59e475b062bfb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" + as ( + +-- depends_on: ref('pos_dedup_cdcx_stg') + +select * from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql index d3cbb9433c931..1b28a6bd09ddc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql @@ -30,9 +30,7 @@ select cast("id" as bigint ) as "id", - cast("name" as - varchar -) as "name", + cast("name" as text) as "name", cast(_ab_cdc_lsn as float ) as _ab_cdc_lsn, @@ -55,21 +53,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__pos_dedup_cdcx_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast("name" as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_lsn as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_updated_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_log_pos as - varchar -), '') as - varchar -)) as _airbyte_pos_dedup_cdcx_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("name" as text), '') || '-' || coalesce(cast(_ab_cdc_lsn as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_log_pos as text), '') as text)) as _airbyte_pos_dedup_cdcx_hashid, tmp.* from __dbt__cte__pos_dedup_cdcx_ab2 tmp -- pos_dedup_cdcx diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..2aa7f2b541197 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,13 @@ + + + + create table "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" + as ( + +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') + +select * from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" + + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 8fb3cb3a5c344..7fba3805f3967 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -39,13 +39,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__renamed_dedup_cdc_excluded_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(_ab_cdc_updated_at as - varchar -), '') as - varchar -)) as _airbyte_renamed_dedup_cdc_excluded_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') as text)) as _airbyte_renamed_dedup_cdc_excluded_hashid, tmp.* from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp -- renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 7d795f97e67e8..2a24e704fda26 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -31,9 +31,7 @@ select cast("id" as bigint ) as "id", - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, cast(nullif("date", '') as date ) as "date", @@ -43,18 +41,14 @@ select cast("HKD@spéçiäl & characters" as float ) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - varchar -) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, cast(usd as float ) as usd, - cast("column`_'with""_quotes" as - varchar -) as "column`_'with""_quotes", + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -66,27 +60,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast(timestamp_col as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(hkd_special___characters as - varchar -), '') || '-' || coalesce(cast(nzd as - varchar -), '') || '-' || coalesce(cast(usd as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') as - varchar -)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql index 080ffcc0b14c6..f6697dcec7577 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql @@ -16,5 +16,5 @@ select from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} as table_alias -- 1_prefix_startwith_number where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql index 5402072233ba4..a9dd516725858 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql @@ -16,5 +16,5 @@ select from {{ ref('1_prefix_startwith_number_ab1') }} -- 1_prefix_startwith_number where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql index 5f212003c29ff..99a03831a8ba8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -18,5 +18,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias -- dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql index fb5d23a430df9..3d8803e27a664 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -18,5 +18,5 @@ select from {{ ref('dedup_cdc_excluded_ab1') }} -- dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 49d750afb636e..5009554c3391c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -21,5 +21,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index a6f5b4c6fda63..187fc05ccc6fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql index 7268a550c1560..3444e2fe46f97 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql @@ -20,5 +20,5 @@ select from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} as table_alias -- multiple_column_names_conflicts where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql index afed155ffbd8d..263d011d1bdeb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql @@ -20,5 +20,5 @@ select from {{ ref('multiple_column_names_conflicts_ab1') }} -- multiple_column_names_conflicts where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql index bb2d814a06204..ee8f1538acb46 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -19,5 +19,5 @@ select from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias -- pos_dedup_cdcx where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql index ec0e36dbec138..96c252758b6d4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -19,5 +19,5 @@ select from {{ ref('pos_dedup_cdcx_ab1') }} -- pos_dedup_cdcx where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index e75261bd70a4e..fbe40aebf3c7c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -15,5 +15,5 @@ select from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index f7a91a73a73cb..f0b99802de8b2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -15,5 +15,5 @@ select from {{ ref('renamed_dedup_cdc_excluded_ab1') }} -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index e2ade95cd401b..c1ebf5ef25874 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='1_prefix_startwith_number' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('1_prefix_startwith_number_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('1_prefix_startwith_number_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('1_prefix_startwith_number_scd_new_data') }})","delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], tags = [ "top-level" ] ) }} --- depends_on: ref('1_prefix_startwith_number_stg') +-- depends on: {{ ref('1_prefix_startwith_number_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('1_prefix_startwith_number_stg') }} - -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('1_prefix_startwith_number_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('1_prefix_startwith_number_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from new_data + select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from {{ ref('1_prefix_startwith_number_scd_new_data') }} union all select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 2fb3816fb87fa..61ed59ea7094f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('dedup_cdc_excluded_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('dedup_cdc_excluded_scd_new_data') }})","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_cdc_excluded_stg') +-- depends on: {{ ref('dedup_cdc_excluded_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_cdc_excluded_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_cdc_excluded_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from {{ ref('dedup_cdc_excluded_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 7234b26c0f810..4a45ab83d50fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,21 +2,62 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('dedup_exchange_rate_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('dedup_exchange_rate_scd_new_data') }})","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -25,11 +66,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -43,7 +84,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 736e25452ae3b..611fe2f676279 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='multiple_column_names_conflicts' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('multiple_column_names_conflicts_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('multiple_column_names_conflicts_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('multiple_column_names_conflicts_scd_new_data') }})","delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('multiple_column_names_conflicts_stg') +-- depends on: {{ ref('multiple_column_names_conflicts_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('multiple_column_names_conflicts_stg') }} - -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('multiple_column_names_conflicts_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('multiple_column_names_conflicts_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from new_data + select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from {{ ref('multiple_column_names_conflicts_scd_new_data') }} union all select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 1512b6fe8546a..302deb09d0ab5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='pos_dedup_cdcx' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('pos_dedup_cdcx_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('pos_dedup_cdcx_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('pos_dedup_cdcx_scd_new_data') }})","delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('pos_dedup_cdcx_stg') +-- depends on: {{ ref('pos_dedup_cdcx_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('pos_dedup_cdcx_stg') }} - -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('pos_dedup_cdcx_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('pos_dedup_cdcx_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from new_data + select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from {{ ref('pos_dedup_cdcx_scd_new_data') }} union all select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 4fbd681d8ee61..c4c433cc3bd8b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='renamed_dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }})","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') +-- depends on: {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} union all select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql index 77aba25edc2a7..f3ea9897b65a4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -19,5 +19,5 @@ from {{ ref('1_prefix_startwith_number_scd') }} -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql new file mode 100644 index 0000000000000..dbc21ce7b3ca8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('1_prefix_startwith_number_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('1_prefix_startwith_number_stg') }} +-- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('1_prefix_startwith_number_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql index 69bff1d44aaa7..c387201c974c8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -16,5 +16,5 @@ select from {{ ref('1_prefix_startwith_number_ab2') }} tmp -- 1_prefix_startwith_number where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql index 2de38510bde83..32d70c680aa9d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -21,5 +21,5 @@ from {{ ref('dedup_cdc_excluded_scd') }} -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..865791db29978 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_cdc_excluded_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_cdc_excluded_stg') }} +-- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_cdc_excluded_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql index 4b95e21267dbe..b0cd4bf7cb134 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -18,5 +18,5 @@ select from {{ ref('dedup_cdc_excluded_ab2') }} tmp -- dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 1ee7d74d027e2..42f7540dc6b9f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -24,5 +24,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..b8dc781f16dc7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql index 62126d7b7c4e4..f892feed3fe7d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql index 9aa1f765c0c8f..3451ce406b4d2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -23,5 +23,5 @@ from {{ ref('multiple_column_names_conflicts_scd') }} -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql new file mode 100644 index 0000000000000..0fe0693e9d1d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('multiple_column_names_conflicts_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('multiple_column_names_conflicts_stg') }} +-- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('multiple_column_names_conflicts_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql index 85ac753575979..c549b49128a62 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -20,5 +20,5 @@ select from {{ ref('multiple_column_names_conflicts_ab2') }} tmp -- multiple_column_names_conflicts where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql index 1d95d8a503384..57ddb1908b9d6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql @@ -22,5 +22,5 @@ from {{ ref('pos_dedup_cdcx_scd') }} -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql new file mode 100644 index 0000000000000..49df39d8e0ff9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('pos_dedup_cdcx_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('pos_dedup_cdcx_stg') }} +-- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('pos_dedup_cdcx_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql index 8fdd8e7d07f0b..692867ceaf4ed 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql @@ -19,5 +19,5 @@ select from {{ ref('pos_dedup_cdcx_ab2') }} tmp -- pos_dedup_cdcx where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index ca5093eb3e17e..603af9d4f80c3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -18,5 +18,5 @@ from {{ ref('renamed_dedup_cdc_excluded_scd') }} -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..4e251b38b7383 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('renamed_dedup_cdc_excluded_stg') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('renamed_dedup_cdc_excluded_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index be9bbfcd86758..96371bb4931a9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -15,5 +15,5 @@ select from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 23e1bb70c5879..8dd3aff00d2cd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -21,5 +21,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index b43312b67ebf1..b5e700b36aa6a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index 590e1e755b5c4..dfa39c2a71eb7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -18,5 +18,5 @@ select from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 0718ac05fcbf9..72f80140e0076 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -18,5 +18,5 @@ select from {{ ref('renamed_dedup_cdc_excluded_ab1') }} -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index bf5adb993db9e..eca96683c58b2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,21 +2,62 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('dedup_exchange_rate_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('dedup_exchange_rate_scd_new_data') }})","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -25,11 +66,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -43,7 +84,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index c0bcd34d32027..3a78255a1d42f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -2,32 +2,71 @@ indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='renamed_dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }})","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') +-- depends on: {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('renamed_dedup_cdc_excluded_stg') }} - -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from new_data + from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} union all select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 8529ede3dcfac..3e51ad4d72565 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -24,5 +24,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..b8dc781f16dc7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql index a4c1e8816f8ae..35c866ac4d364 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 80ff3fc2138ca..672118dcf045c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -21,5 +21,5 @@ from {{ ref('renamed_dedup_cdc_excluded_scd') }} -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..4e251b38b7383 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('renamed_dedup_cdc_excluded_stg') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('renamed_dedup_cdc_excluded_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 86d0e6f4451d0..b2d5002b934a3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -18,5 +18,5 @@ select from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql new file mode 100644 index 0000000000000..b246701c2e123 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "1_prefix_startwith_number_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" ("_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "1_prefix_startwith_number_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..7006d949e5f5b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_cdc_excluded_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_cdc_excluded_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..18d62497dea4a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_exchange_rate_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" ("_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_exchange_rate_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql new file mode 100644 index 0000000000000..914878633d4d8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "multiple_column_names_conflicts_scd_new_da__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" ("_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "multiple_column_names_conflicts_scd_new_da__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql new file mode 100644 index 0000000000000..da0f97d9aab3a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "pos_dedup_cdcx_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" ("_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "pos_dedup_cdcx_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..facf4989f9084 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 7d795f97e67e8..2a24e704fda26 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -31,9 +31,7 @@ select cast("id" as bigint ) as "id", - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, cast(nullif("date", '') as date ) as "date", @@ -43,18 +41,14 @@ select cast("HKD@spéçiäl & characters" as float ) as "HKD@spéçiäl & characters", - cast(hkd_special___characters as - varchar -) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, cast(usd as float ) as usd, - cast("column`_'with""_quotes" as - varchar -) as "column`_'with""_quotes", + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -66,27 +60,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast(timestamp_col as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(hkd_special___characters as - varchar -), '') || '-' || coalesce(cast(nzd as - varchar -), '') || '-' || coalesce(cast(usd as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') as - varchar -)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..c84aad95317f1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_exchange_rate_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" ("_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd") + ( + select "_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd" + from "dedup_exchange_rate_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql new file mode 100644 index 0000000000000..5f63dc0467c5d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") + ( + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" + from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql index d7f0d50be215f..155df4698f2d1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -31,9 +31,7 @@ select cast("id" as float ) as "id", - cast(currency as - varchar -) as currency, + cast(currency as text) as currency, cast(new_column as float ) as new_column, @@ -52,9 +50,7 @@ select cast(usd as float ) as usd, - cast("column`_'with""_quotes" as - varchar -) as "column`_'with""_quotes", + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -66,27 +62,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast("id" as - varchar -), '') || '-' || coalesce(cast(currency as - varchar -), '') || '-' || coalesce(cast(new_column as - varchar -), '') || '-' || coalesce(cast("date" as - varchar -), '') || '-' || coalesce(cast(timestamp_col as - varchar -), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as - varchar -), '') || '-' || coalesce(cast(nzd as - varchar -), '') || '-' || coalesce(cast(usd as - varchar -), '') || '-' || coalesce(cast("column`_'with""_quotes" as - varchar -), '') as - varchar -)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml index 7631ef356dc92..44d3103d221e9 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml @@ -1,46 +1,32 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: + +transient: false + +pre-hook: SET enable_case_sensitive_identifier to TRUE airbyte_utils: +materialized: table generated: @@ -57,7 +43,79 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index caeba18c24776..2a961faaec733 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -9,7 +9,7 @@ as ( --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends on: "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data" with input_data as ( @@ -21,7 +21,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast(id as varchar), '') as varchar)) as _airbyte_unique_key, + md5(cast(coalesce(cast(id as text), '') as text)) as _airbyte_unique_key, id, date, "partition", @@ -56,7 +56,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as varchar), '') || '-' || coalesce(cast(_airbyte_start_at as varchar), '') || '-' || coalesce(cast(_airbyte_emitted_at as varchar), '') as varchar)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql index 25a5f72a235c4..8348fdeb81320 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -15,8 +15,8 @@ with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_pa -- depends_on: "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd" select _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, - json_extract_path_text("partition", 'double_array_data', true) as double_array_data, - json_extract_path_text("partition", 'DATA', true) as data, + "partition"."double_array_data" as double_array_data, + "partition"."DATA" as data, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -45,7 +45,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 select - md5(cast(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as varchar), '') || '-' || coalesce(cast(double_array_data as varchar), '') || '-' || coalesce(cast(data as varchar), '') as varchar)) as _airbyte_partition_hashid, + md5(cast(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as text), '') || '-' || coalesce(cast(json_serialize(double_array_data) as text), '') || '-' || coalesce(cast(json_serialize(data) as text), '') as text)) as _airbyte_partition_hashid, tmp.* from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql index 0cd481382f105..3f46c9e431c0b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -13,53 +13,16 @@ with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_pa -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition" -with numbers as ( - - - - - with p as ( - select 0 as generated_number union all select 1 - ), unioned as ( - - select - - - p0.generated_number * power(2, 0) - - - + 1 - as generated_number - - from - - - p as p0 - - - - ) - - select * - from unioned - where generated_number <= 1 - order by generated_number - -), -joined as ( - select - _airbyte_partition_hashid as _airbyte_hashid, - json_extract_array_element_text(data, numbers.generated_number::int - 1, true) as _airbyte_nested_data - from "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition" - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition".data - where numbers.generated_number <= json_array_length(data, true) -) + with joined as ( + select + table_alias._airbyte_partition_hashid as _airbyte_hashid, + _airbyte_nested_data + from "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.data as _airbyte_nested_data + ) select _airbyte_partition_hashid, - case when json_extract_path_text(_airbyte_nested_data, 'currency', true) != '' then json_extract_path_text(_airbyte_nested_data, 'currency', true) end as currency, + case when _airbyte_nested_data."currency" != '' then _airbyte_nested_data."currency" end as currency, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -75,7 +38,7 @@ and data is not null -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 select _airbyte_partition_hashid, - cast(currency as varchar) as currency, + cast(currency as text) as currency, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -88,7 +51,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 select - md5(cast(coalesce(cast(_airbyte_partition_hashid as varchar), '') || '-' || coalesce(cast(currency as varchar), '') as varchar)) as _airbyte_data_hashid, + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_data_hashid, tmp.* from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 tmp -- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql index 0ef9e77bb055f..84fbebb03b50e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -13,53 +13,16 @@ with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_pa -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition" -with numbers as ( - - - - - with p as ( - select 0 as generated_number union all select 1 - ), unioned as ( - - select - - - p0.generated_number * power(2, 0) - - - + 1 - as generated_number - - from - - - p as p0 - - - - ) - - select * - from unioned - where generated_number <= 2 - order by generated_number - -), -joined as ( - select - _airbyte_partition_hashid as _airbyte_hashid, - json_extract_array_element_text(double_array_data, numbers.generated_number::int - 1, true) as _airbyte_nested_data - from "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition" - cross join numbers - -- only generate the number of records in the cross join that corresponds - -- to the number of items in "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition".double_array_data - where numbers.generated_number <= json_array_length(double_array_data, true) -) + with joined as ( + select + table_alias._airbyte_partition_hashid as _airbyte_hashid, + _airbyte_nested_data + from "integrationtests".test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.double_array_data as _airbyte_nested_data + ) select _airbyte_partition_hashid, - case when json_extract_path_text(_airbyte_nested_data, 'id', true) != '' then json_extract_path_text(_airbyte_nested_data, 'id', true) end as id, + case when _airbyte_nested_data."id" != '' then _airbyte_nested_data."id" end as id, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -75,7 +38,7 @@ and double_array_data is not null -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 select _airbyte_partition_hashid, - cast(id as varchar) as id, + cast(id as text) as id, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -88,7 +51,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 select - md5(cast(coalesce(cast(_airbyte_partition_hashid as varchar), '') || '-' || coalesce(cast(id as varchar), '') as varchar)) as _airbyte_double_array_data_hashid, + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(id as text), '') as text)) as _airbyte_double_array_data_hashid, tmp.* from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..882316f6da066 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql @@ -0,0 +1,10 @@ + + + create view "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data__dbt_tmp" as ( + +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') + +select * from "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_stg" + + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql index 4e3c132b78181..c6c4c7bb39732 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql @@ -16,5 +16,5 @@ select from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias -- nested_stream_with_complex_columns_resulting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql index 35b02dc8a020f..41c1f86de0c7a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql @@ -16,5 +16,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} -- nested_stream_with_complex_columns_resulting_into_long_names where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql index 96a05e3dd72cb..7d9968fe71700 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql @@ -16,5 +16,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 and {{ adapter.quote('partition') }} is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql index a50b54ec3b7b8..b3f16a06f544c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql @@ -17,5 +17,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partit {{ cross_join_unnest('partition', 'data') }} where 1 = 1 and data is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql index ff6a32a5cf48d..7fe25a4c8eca6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql @@ -17,5 +17,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partit {{ cross_join_unnest('partition', 'double_array_data') }} where 1 = 1 and double_array_data is not null -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index e89e97f58fea2..bd7b368d44c9e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -2,32 +2,71 @@ sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_complex_columns_resulting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }}","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} union all select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql index d6f1dc869ff51..7b608604bbeeb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -19,5 +19,5 @@ from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql index 030b87c4b7686..10a422df08a2e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -16,5 +16,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql index b13b4ba136285..4551ff86747c0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3') }} -- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql index bf4fd96d2192f..3f7bbf34e3e85 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -15,5 +15,5 @@ select from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..8d6939ff2157e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} +-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql new file mode 100644 index 0000000000000..882316f6da066 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql @@ -0,0 +1,10 @@ + + + create view "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data__dbt_tmp" as ( + +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') + +select * from "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_stg" + + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml index 88dde818dd4dd..6df743bc8c9f0 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml @@ -1,46 +1,32 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["modified_models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - modified_models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: + +transient: false + +pre-hook: SET enable_case_sensitive_identifier to TRUE airbyte_utils: +materialized: table generated: @@ -57,7 +43,27 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml index 7631ef356dc92..ef94da4100413 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml @@ -1,46 +1,32 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: + +transient: false + +pre-hook: SET enable_case_sensitive_identifier to TRUE airbyte_utils: +materialized: table generated: @@ -57,7 +43,51 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 14ff0512e8af4..d305fad65aa01 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -9,7 +9,7 @@ as ( --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" with input_data as ( @@ -21,7 +21,7 @@ input_data as ( scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') as varchar)) as _airbyte_unique_key, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(nzd as text), '') as text)) as _airbyte_unique_key, id, currency, date, @@ -32,14 +32,14 @@ scd_data as ( usd, date as _airbyte_start_at, lag(date) over ( - partition by id, currency, cast(nzd as varchar) + partition by id, currency, cast(nzd as text) order by date is null asc, date desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( - partition by id, currency, cast(nzd as varchar) + partition by id, currency, cast(nzd as text) order by date is null asc, date desc, @@ -61,7 +61,7 @@ dedup_data as ( _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, - md5(cast(coalesce(cast(_airbyte_unique_key as varchar), '') || '-' || coalesce(cast(_airbyte_start_at as varchar), '') || '-' || coalesce(cast(_airbyte_emitted_at as varchar), '') as varchar)) as _airbyte_unique_key_scd, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, scd_data.* from scd_data ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 0d13846cdfd44..f2537f70055b6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -13,15 +13,15 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_exchange_rate select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'currency', true) != '' then json_extract_path_text(_airbyte_data, 'currency', true) end as currency, - case when json_extract_path_text(_airbyte_data, 'date', true) != '' then json_extract_path_text(_airbyte_data, 'date', true) end as date, - case when json_extract_path_text(_airbyte_data, 'timestamp_col', true) != '' then json_extract_path_text(_airbyte_data, 'timestamp_col', true) end as timestamp_col, - case when json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) end as "hkd@spéçiäl & characters", - case when json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) end as hkd_special___characters, - case when json_extract_path_text(_airbyte_data, 'NZD', true) != '' then json_extract_path_text(_airbyte_data, 'NZD', true) end as nzd, - case when json_extract_path_text(_airbyte_data, 'USD', true) != '' then json_extract_path_text(_airbyte_data, 'USD', true) end as usd, - case when json_extract_path_text(_airbyte_data, 'column`_''with"_quotes', true) != '' then json_extract_path_text(_airbyte_data, 'column`_''with"_quotes', true) end as "column`_'with""_quotes", + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -36,24 +36,24 @@ select cast(id as bigint ) as id, - cast(currency as varchar) as currency, - cast(nullif(date, '') as + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as date ) as date, - cast(nullif(timestamp_col, '') as + cast(nullif(timestamp_col::varchar, '') as timestamp with time zone ) as timestamp_col, cast("hkd@spéçiäl & characters" as float ) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as varchar) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, cast(usd as float ) as usd, - cast("column`_'with""_quotes" as varchar) as "column`_'with""_quotes", + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -65,7 +65,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(date as varchar), '') || '-' || coalesce(cast(timestamp_col as varchar), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as varchar), '') || '-' || coalesce(cast(hkd_special___characters as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') || '-' || coalesce(cast(usd as varchar), '') || '-' || coalesce(cast("column`_'with""_quotes" as varchar), '') as varchar)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..200a7b1d3e0a6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,10 @@ + + + create view "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data__dbt_tmp" as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_stg" + + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 2c02508e5b871..f20d8dbdc37be 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -7,14 +7,14 @@ with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_dedup_exchange_rate select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'currency', true) != '' then json_extract_path_text(_airbyte_data, 'currency', true) end as currency, - case when json_extract_path_text(_airbyte_data, 'date', true) != '' then json_extract_path_text(_airbyte_data, 'date', true) end as date, - case when json_extract_path_text(_airbyte_data, 'timestamp_col', true) != '' then json_extract_path_text(_airbyte_data, 'timestamp_col', true) end as timestamp_col, - case when json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) end as "hkd@spéçiäl & characters", - case when json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) end as hkd_special___characters, - case when json_extract_path_text(_airbyte_data, 'NZD', true) != '' then json_extract_path_text(_airbyte_data, 'NZD', true) end as nzd, - case when json_extract_path_text(_airbyte_data, 'USD', true) != '' then json_extract_path_text(_airbyte_data, 'USD', true) end as usd, + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -30,17 +30,17 @@ select cast(id as bigint ) as id, - cast(currency as varchar) as currency, - cast(nullif(date, '') as + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as date ) as date, - cast(nullif(timestamp_col, '') as + cast(nullif(timestamp_col::varchar, '') as timestamp with time zone ) as timestamp_col, cast("hkd@spéçiäl & characters" as float ) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as varchar) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, @@ -57,7 +57,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__dedup_exchange_rate_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(date as varchar), '') || '-' || coalesce(cast(timestamp_col as varchar), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as varchar), '') || '-' || coalesce(cast(hkd_special___characters as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') || '-' || coalesce(cast(usd as varchar), '') as varchar)) as _airbyte_dedup_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, tmp.* from __dbt__cte__dedup_exchange_rate_ab2 tmp -- dedup_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql index 0777ba0c53932..37786e8c52561 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -7,13 +7,13 @@ with __dbt__cte__multiple_column_names_conflicts_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_multiple_column_names_conflicts select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'User Id', true) != '' then json_extract_path_text(_airbyte_data, 'User Id', true) end as "user id", - case when json_extract_path_text(_airbyte_data, 'user_id', true) != '' then json_extract_path_text(_airbyte_data, 'user_id', true) end as user_id, - case when json_extract_path_text(_airbyte_data, 'User id', true) != '' then json_extract_path_text(_airbyte_data, 'User id', true) end as "user id_1", - case when json_extract_path_text(_airbyte_data, 'user id', true) != '' then json_extract_path_text(_airbyte_data, 'user id', true) end as "user id_2", - case when json_extract_path_text(_airbyte_data, 'User@Id', true) != '' then json_extract_path_text(_airbyte_data, 'User@Id', true) end as "user@id", - case when json_extract_path_text(_airbyte_data, 'UserId', true) != '' then json_extract_path_text(_airbyte_data, 'UserId', true) end as userid, + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."User Id" != '' then _airbyte_data."User Id" end as "user id", + case when _airbyte_data."user_id" != '' then _airbyte_data."user_id" end as user_id, + case when _airbyte_data."User id" != '' then _airbyte_data."User id" end as "user id_1", + case when _airbyte_data."user id" != '' then _airbyte_data."user id" end as "user id_2", + case when _airbyte_data."User@Id" != '' then _airbyte_data."User@Id" end as "user@id", + case when _airbyte_data."UserId" != '' then _airbyte_data."UserId" end as userid, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -29,7 +29,7 @@ select cast(id as bigint ) as id, - cast("user id" as varchar) as "user id", + cast("user id" as text) as "user id", cast(user_id as float ) as user_id, @@ -39,7 +39,7 @@ select cast("user id_2" as float ) as "user id_2", - cast("user@id" as varchar) as "user@id", + cast("user@id" as text) as "user@id", cast(userid as float ) as userid, @@ -53,7 +53,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast("user id" as varchar), '') || '-' || coalesce(cast(user_id as varchar), '') || '-' || coalesce(cast("user id_1" as varchar), '') || '-' || coalesce(cast("user id_2" as varchar), '') || '-' || coalesce(cast("user@id" as varchar), '') || '-' || coalesce(cast(userid as varchar), '') as varchar)) as _airbyte_multiple_column_names_conflicts_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("user id_1" as text), '') || '-' || coalesce(cast("user id_2" as text), '') || '-' || coalesce(cast("user@id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_column_names_conflicts_hashid, tmp.* from __dbt__cte__multiple_column_names_conflicts_ab2 tmp -- multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index b737fc7a29984..17c4a88a2059e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -21,5 +21,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 97341fcfad770..796d4205f5ae7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3cb089de2de1a..8da0bdee4f901 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,21 +2,62 @@ sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -25,11 +66,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -43,7 +84,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 4159603bae9ea..d8b57a81b7cf1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -24,5 +24,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..02049154e8298 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index e0f6b9699b7d6..8de81a6690f80 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index ab09ca51f11c2..eca4c17d59fba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -21,5 +21,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 9b24d65d796e8..bf26dc2829f01 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 0145a94818b0a..1b51c0ba1d938 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,21 +2,62 @@ sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) + select modified_ids._airbyte_unique_key from scd_active_rows + right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -25,11 +66,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -43,7 +84,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index c3b47dab239be..421177e811796 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -24,5 +24,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..02049154e8298 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 1df86fb5598c9..59153246fdb5b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -21,5 +21,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 0d13846cdfd44..f2537f70055b6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -13,15 +13,15 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_exchange_rate select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'currency', true) != '' then json_extract_path_text(_airbyte_data, 'currency', true) end as currency, - case when json_extract_path_text(_airbyte_data, 'date', true) != '' then json_extract_path_text(_airbyte_data, 'date', true) end as date, - case when json_extract_path_text(_airbyte_data, 'timestamp_col', true) != '' then json_extract_path_text(_airbyte_data, 'timestamp_col', true) end as timestamp_col, - case when json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) end as "hkd@spéçiäl & characters", - case when json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) end as hkd_special___characters, - case when json_extract_path_text(_airbyte_data, 'NZD', true) != '' then json_extract_path_text(_airbyte_data, 'NZD', true) end as nzd, - case when json_extract_path_text(_airbyte_data, 'USD', true) != '' then json_extract_path_text(_airbyte_data, 'USD', true) end as usd, - case when json_extract_path_text(_airbyte_data, 'column`_''with"_quotes', true) != '' then json_extract_path_text(_airbyte_data, 'column`_''with"_quotes', true) end as "column`_'with""_quotes", + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -36,24 +36,24 @@ select cast(id as bigint ) as id, - cast(currency as varchar) as currency, - cast(nullif(date, '') as + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as date ) as date, - cast(nullif(timestamp_col, '') as + cast(nullif(timestamp_col::varchar, '') as timestamp with time zone ) as timestamp_col, cast("hkd@spéçiäl & characters" as float ) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as varchar) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, cast(usd as float ) as usd, - cast("column`_'with""_quotes" as varchar) as "column`_'with""_quotes", + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -65,7 +65,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(date as varchar), '') || '-' || coalesce(cast(timestamp_col as varchar), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as varchar), '') || '-' || coalesce(cast(hkd_special___characters as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') || '-' || coalesce(cast(usd as varchar), '') || '-' || coalesce(cast("column`_'with""_quotes" as varchar), '') as varchar)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..200a7b1d3e0a6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,10 @@ + + + create view "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data__dbt_tmp" as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_stg" + + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 2c02508e5b871..f20d8dbdc37be 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -7,14 +7,14 @@ with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_dedup_exchange_rate select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'currency', true) != '' then json_extract_path_text(_airbyte_data, 'currency', true) end as currency, - case when json_extract_path_text(_airbyte_data, 'date', true) != '' then json_extract_path_text(_airbyte_data, 'date', true) end as date, - case when json_extract_path_text(_airbyte_data, 'timestamp_col', true) != '' then json_extract_path_text(_airbyte_data, 'timestamp_col', true) end as timestamp_col, - case when json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) end as "hkd@spéçiäl & characters", - case when json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD_special___characters', true) end as hkd_special___characters, - case when json_extract_path_text(_airbyte_data, 'NZD', true) != '' then json_extract_path_text(_airbyte_data, 'NZD', true) end as nzd, - case when json_extract_path_text(_airbyte_data, 'USD', true) != '' then json_extract_path_text(_airbyte_data, 'USD', true) end as usd, + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -30,17 +30,17 @@ select cast(id as bigint ) as id, - cast(currency as varchar) as currency, - cast(nullif(date, '') as + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as date ) as date, - cast(nullif(timestamp_col, '') as + cast(nullif(timestamp_col::varchar, '') as timestamp with time zone ) as timestamp_col, cast("hkd@spéçiäl & characters" as float ) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as varchar) as hkd_special___characters, + cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, @@ -57,7 +57,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__dedup_exchange_rate_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(date as varchar), '') || '-' || coalesce(cast(timestamp_col as varchar), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as varchar), '') || '-' || coalesce(cast(hkd_special___characters as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') || '-' || coalesce(cast(usd as varchar), '') as varchar)) as _airbyte_dedup_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, tmp.* from __dbt__cte__dedup_exchange_rate_ab2 tmp -- dedup_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql index 5fb76893d7933..1fa0ba1e56c49 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -13,15 +13,15 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_exchange_rate select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'currency', true) != '' then json_extract_path_text(_airbyte_data, 'currency', true) end as currency, - case when json_extract_path_text(_airbyte_data, 'new_column', true) != '' then json_extract_path_text(_airbyte_data, 'new_column', true) end as new_column, - case when json_extract_path_text(_airbyte_data, 'date', true) != '' then json_extract_path_text(_airbyte_data, 'date', true) end as date, - case when json_extract_path_text(_airbyte_data, 'timestamp_col', true) != '' then json_extract_path_text(_airbyte_data, 'timestamp_col', true) end as timestamp_col, - case when json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) end as "hkd@spéçiäl & characters", - case when json_extract_path_text(_airbyte_data, 'NZD', true) != '' then json_extract_path_text(_airbyte_data, 'NZD', true) end as nzd, - case when json_extract_path_text(_airbyte_data, 'USD', true) != '' then json_extract_path_text(_airbyte_data, 'USD', true) end as usd, - case when json_extract_path_text(_airbyte_data, 'column`_''with"_quotes', true) != '' then json_extract_path_text(_airbyte_data, 'column`_''with"_quotes', true) end as "column`_'with""_quotes", + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -36,14 +36,14 @@ select cast(id as float ) as id, - cast(currency as varchar) as currency, + cast(currency as text) as currency, cast(new_column as float ) as new_column, - cast(nullif(date, '') as + cast(nullif(date::varchar, '') as date ) as date, - cast(nullif(timestamp_col, '') as + cast(nullif(timestamp_col::varchar, '') as timestamp with time zone ) as timestamp_col, cast("hkd@spéçiäl & characters" as @@ -55,7 +55,7 @@ select cast(usd as float ) as usd, - cast("column`_'with""_quotes" as varchar) as "column`_'with""_quotes", + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -67,7 +67,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(new_column as varchar), '') || '-' || coalesce(cast(date as varchar), '') || '-' || coalesce(cast(timestamp_col as varchar), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') || '-' || coalesce(cast(usd as varchar), '') || '-' || coalesce(cast("column`_'with""_quotes" as varchar), '') as varchar)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..200a7b1d3e0a6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,10 @@ + + + create view "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data__dbt_tmp" as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_stg" + + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 797b5a85940c2..c7ee5d552f22a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -7,14 +7,14 @@ with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: "integrationtests".test_normalization._airbyte_raw_dedup_exchange_rate select - case when json_extract_path_text(_airbyte_data, 'id', true) != '' then json_extract_path_text(_airbyte_data, 'id', true) end as id, - case when json_extract_path_text(_airbyte_data, 'currency', true) != '' then json_extract_path_text(_airbyte_data, 'currency', true) end as currency, - case when json_extract_path_text(_airbyte_data, 'new_column', true) != '' then json_extract_path_text(_airbyte_data, 'new_column', true) end as new_column, - case when json_extract_path_text(_airbyte_data, 'date', true) != '' then json_extract_path_text(_airbyte_data, 'date', true) end as date, - case when json_extract_path_text(_airbyte_data, 'timestamp_col', true) != '' then json_extract_path_text(_airbyte_data, 'timestamp_col', true) end as timestamp_col, - case when json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) != '' then json_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters', true) end as "hkd@spéçiäl & characters", - case when json_extract_path_text(_airbyte_data, 'NZD', true) != '' then json_extract_path_text(_airbyte_data, 'NZD', true) end as nzd, - case when json_extract_path_text(_airbyte_data, 'USD', true) != '' then json_extract_path_text(_airbyte_data, 'USD', true) end as usd, + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -30,14 +30,14 @@ select cast(id as float ) as id, - cast(currency as varchar) as currency, + cast(currency as text) as currency, cast(new_column as float ) as new_column, - cast(nullif(date, '') as + cast(nullif(date::varchar, '') as date ) as date, - cast(nullif(timestamp_col, '') as + cast(nullif(timestamp_col::varchar, '') as timestamp with time zone ) as timestamp_col, cast("hkd@spéçiäl & characters" as @@ -59,7 +59,7 @@ where 1 = 1 )-- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__dedup_exchange_rate_ab2 select - md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(new_column as varchar), '') || '-' || coalesce(cast(date as varchar), '') || '-' || coalesce(cast(timestamp_col as varchar), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') || '-' || coalesce(cast(usd as varchar), '') as varchar)) as _airbyte_dedup_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, tmp.* from __dbt__cte__dedup_exchange_rate_ab2 tmp -- dedup_exchange_rate diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml index 571d5bae581bb..9797dbfc27804 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml @@ -53,6 +53,7 @@ vars: NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES @@ -62,6 +63,7 @@ vars: SOME_STREAM_THAT_WAS_EMPTY_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY_STG: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + SOME_STREAM_THAT_WAS_EMPTY_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index b3072ce0004ca..7a1e1c737e941 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -3,7 +3,7 @@ create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as (select * from( --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +-- depends on: "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA" with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql new file mode 100644 index 0000000000000..9eee6ae93ca9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql @@ -0,0 +1,11 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA" + + as ( + +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') + +select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG" + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql index 7a583b5f16f71..772f1976f2c6d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql @@ -16,5 +16,5 @@ select from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} as table_alias -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql index ff84f05bcf230..fd49a8524a645 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql @@ -16,5 +16,5 @@ select from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1') }} -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql index bd6a0678a36ad..e6c344e6308d2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql @@ -16,5 +16,5 @@ from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') -- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 and PARTITION is not null -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql index d7c93aa351da6..050da953efddd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql @@ -17,5 +17,5 @@ from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTIT {{ cross_join_unnest('PARTITION', 'DATA') }} where 1 = 1 and DATA is not null -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql index d887fcbbac973..13b208068c10a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql @@ -17,5 +17,5 @@ from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTIT {{ cross_join_unnest('PARTITION', 'DOUBLE_ARRAY_DATA') }} where 1 = 1 and DOUBLE_ARRAY_DATA is not null -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql index e78648f624150..110c17ef216dc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql @@ -19,5 +19,5 @@ from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} where 1 = 1 and _AIRBYTE_ACTIVE_ROW = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql index 29b0545db8862..3dda7efc9c613 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql @@ -16,5 +16,5 @@ select from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3') }} -- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql index 721c594cbead8..526c8b658f19c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql @@ -15,5 +15,5 @@ select from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3') }} -- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql index 11cbbf596cf99..c46547e9a6242 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql @@ -15,5 +15,5 @@ select from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3') }} -- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 9435ebaf2bc1d..97a332668c8f0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -2,32 +2,71 @@ cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", schema = "TEST_NORMALIZATION", - post_hook = ["drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'ID', + ]) }} as _AIRBYTE_UNIQUE_KEY + from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} + where 1=1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY + where _AIRBYTE_ACTIVE_ROW = 1 + ) + select modified_ids._AIRBYTE_UNIQUE_KEY from scd_active_rows + right outer join modified_ids on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY + group by modified_ids._AIRBYTE_UNIQUE_KEY + having count(scd_active_rows._AIRBYTE_UNIQUE_KEY) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }}","drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], tags = [ "top-level" ] ) }} --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +-- depends on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} - -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} - where 1 = 1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT') }} -), new_data_ids as ( -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'ID', ]) }} as _AIRBYTE_UNIQUE_KEY - from new_data + from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -41,7 +80,7 @@ previous_active_scd_data as ( where _AIRBYTE_ACTIVE_ROW = 1 ), input_data as ( - select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from new_data + select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} union all select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql new file mode 100644 index 0000000000000..27968b241cb00 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} +-- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} +{% else %} +select * from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} +{% endif %} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql new file mode 100644 index 0000000000000..9eee6ae93ca9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql @@ -0,0 +1,11 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA" + + as ( + +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') + +select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG" + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml index 76a02e4351f0e..083fcda300c3b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml @@ -57,30 +57,36 @@ vars: DEDUP_EXCHANGE_RATE_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + DEDUP_EXCHANGE_RATE_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE RENAMED_DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + RENAMED_DEDUP_CDC_EXCLUDED_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + DEDUP_CDC_EXCLUDED_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED POS_DEDUP_CDCX_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX_STG: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX + POS_DEDUP_CDCX_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX 1_prefix_startwith_number_ab1: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number_ab2: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number_stg: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + 1_prefix_startwith_number_scd_new_data: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number_scd: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS_STG: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + MULTIPLE_COLUMN_NAMES_CONFLICTS_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 220cd093da41e..fbce7f410f858 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -3,7 +3,7 @@ create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as (select * from( --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') +-- depends on: "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD_NEW_DATA" with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql new file mode 100644 index 0000000000000..83e400e3078ad --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql @@ -0,0 +1,11 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD_NEW_DATA" + + as ( + +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') + +select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" + + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql index 64750e4ebfa83..06be4a0eaa2fb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql @@ -21,5 +21,5 @@ select from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} as table_alias -- DEDUP_EXCHANGE_RATE where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql index 4a89013a88eee..f3a40af778cc4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql @@ -29,5 +29,5 @@ select from {{ ref('DEDUP_EXCHANGE_RATE_AB1') }} -- DEDUP_EXCHANGE_RATE where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql index 0cf5e6b3819aa..0663a8d251e46 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql @@ -24,5 +24,5 @@ from {{ ref('DEDUP_EXCHANGE_RATE_SCD') }} -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} where 1 = 1 and _AIRBYTE_ACTIVE_ROW = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 688926bdcab0f..c9e108ee29783 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -2,21 +2,62 @@ cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", schema = "TEST_NORMALIZATION", - post_hook = ["drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='DEDUP_EXCHANGE_RATE' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( + with modified_ids as ( + select + {{ dbt_utils.surrogate_key([ + 'ID', + 'CURRENCY', + 'NZD', + ]) }} as _AIRBYTE_UNIQUE_KEY + from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} + where 1=1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ), + scd_active_rows as ( + select scd_table.* from {{ this }} scd_table + inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY + where _AIRBYTE_ACTIVE_ROW = 1 + ) + select modified_ids._AIRBYTE_UNIQUE_KEY from scd_active_rows + right outer join modified_ids on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY + group by modified_ids._AIRBYTE_UNIQUE_KEY + having count(scd_active_rows._AIRBYTE_UNIQUE_KEY) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }}","drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], tags = [ "top-level" ] ) }} --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') +-- depends on: {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} - -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} - where 1 = 1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT') }} -), new_data_ids as ( -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new select distinct @@ -25,11 +66,11 @@ new_data_ids as ( 'CURRENCY', 'NZD', ]) }} as _AIRBYTE_UNIQUE_KEY - from new_data + from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -43,7 +84,7 @@ previous_active_scd_data as ( where _AIRBYTE_ACTIVE_ROW = 1 ), input_data as ( - select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from new_data + select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} union all select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql new file mode 100644 index 0000000000000..a327f8001be1a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} +-- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} +{% else %} +select * from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} +{% endif %} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql index e40d4e943eb7a..d810a79652be6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql @@ -21,5 +21,5 @@ select from {{ ref('DEDUP_EXCHANGE_RATE_AB2') }} tmp -- DEDUP_EXCHANGE_RATE where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT') }} +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql new file mode 100644 index 0000000000000..83e400e3078ad --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql @@ -0,0 +1,11 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD_NEW_DATA" + + as ( + +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') + +select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" + + + ); From eb08d9f9f3387cc8f6888a8780da8dcd516b90b0 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 31 May 2022 20:18:33 -0700 Subject: [PATCH 22/43] mysql on m1 --- .../integration_tests/dbt_integration_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index 844f41ece940f..1652e481281fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -132,10 +132,12 @@ def setup_mysql_db(self): "MYSQL_INITDB_SKIP_TZINFO=yes", "-e", f"MYSQL_DATABASE={config['database']}", + "-e", + "MYSQL_ROOT_HOST=%", "-p", f"{config['port']}:3306", "-d", - "mysql", + "mysql/mysql-server", ] print("Executing: ", " ".join(commands)) subprocess.call(commands) From eb29d6fd6bdb936fd064eb1ba888c416c9417aae Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 1 Jun 2022 09:32:36 -0700 Subject: [PATCH 23/43] better clickhouse delete --- .../transform_catalog/stream_processor.py | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 89e729eb11c20..b72fd2f723dd6 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1149,10 +1149,28 @@ def add_to_outputs( final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") if self.destination_type == DestinationType.CLICKHOUSE: - delete_statement = "alter table {{ final_table_relation }} delete" + # Clickhouse has special delete syntax + delete_statement = ( + "alter table {{ final_table_relation }} delete where {{ final_table_relation }}." + + self.get_unique_key(in_jinja=False) + + " in" + ) noop_delete_statement = "alter table {{ this }} delete where 1=0" + elif self.destination_type == DestinationType.BIGQUERY: + # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; + # it requires "delete from project.schema.table table_alias where table_alias.column in" + delete_statement = ( + "delete from {{ final_table_relation }} final_table where final_table." + + self.get_unique_key(in_jinja=False) + + " in" + ) + noop_delete_statement = "delete from {{ this }} where 1=0" else: - delete_statement = "delete from {{ final_table_relation }}" + delete_statement = ( + "delete from {{ final_table_relation }} where {{ final_table_relation }}." + + self.get_unique_key(in_jinja=False) + + " in" + ) noop_delete_statement = "delete from {{ this }} where 1=0" deletion_hook = Template( """ @@ -1180,8 +1198,7 @@ def add_to_outputs( -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - {{ delete_statement }} - where {{ '{{ final_table_relation }}' }}.{{ unique_key }} in ( + {{ delete_statement }} ( with modified_ids as ( select {{ '{{' }} dbt_utils.surrogate_key([ @@ -1203,7 +1220,6 @@ def add_to_outputs( group by modified_ids.{{ unique_key }} having count(scd_active_rows.{{ unique_key }}) = 0 ) - {{ '{% else %}' }} -- We have to have a non-empty query, so just do a noop delete {{ noop_delete_statement }} From a176e0fe38f6afa5e9a88eb32fe085be68d8126d Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 1 Jun 2022 20:58:53 -0700 Subject: [PATCH 24/43] fix for clickhouse (no more CTEs in where clause) --- .../transform_catalog/stream_processor.py | 59 +++++++++---------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index b72fd2f723dd6..44d09635c3403 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1150,26 +1150,18 @@ def add_to_outputs( active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") if self.destination_type == DestinationType.CLICKHOUSE: # Clickhouse has special delete syntax - delete_statement = ( - "alter table {{ final_table_relation }} delete where {{ final_table_relation }}." - + self.get_unique_key(in_jinja=False) - + " in" - ) + delete_statement = "alter table {{ final_table_relation }} delete where " + self.get_unique_key(in_jinja=False) noop_delete_statement = "alter table {{ this }} delete where 1=0" elif self.destination_type == DestinationType.BIGQUERY: # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; # it requires "delete from project.schema.table table_alias where table_alias.column in" - delete_statement = ( - "delete from {{ final_table_relation }} final_table where final_table." - + self.get_unique_key(in_jinja=False) - + " in" + delete_statement = "delete from {{ final_table_relation }} final_table where final_table." + self.get_unique_key( + in_jinja=False ) noop_delete_statement = "delete from {{ this }} where 1=0" else: - delete_statement = ( - "delete from {{ final_table_relation }} where {{ final_table_relation }}." - + self.get_unique_key(in_jinja=False) - + " in" + delete_statement = "delete from {{ final_table_relation }} where {{ final_table_relation }}." + self.get_unique_key( + in_jinja=False ) noop_delete_statement = "delete from {{ this }} where 1=0" deletion_hook = Template( @@ -1198,25 +1190,28 @@ def add_to_outputs( -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - {{ delete_statement }} ( - with modified_ids as ( - select - {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} - ]) {{ '}}' }} as {{ unique_key }} - from {{ quoted_scd_new_data_table }} - where 1=1 - {{ incremental_clause }} - ), - scd_active_rows as ( - select scd_table.* from {{ '{{ this }}' }} scd_table - inner join modified_ids on scd_table.{{ unique_key }} = modified_ids.{{ unique_key }} - where {{ active_row_column_name }} = 1 - ) - select modified_ids.{{ unique_key }} from scd_active_rows - right outer join modified_ids on modified_ids.{{ unique_key }} = scd_active_rows.{{ unique_key }} + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. + {{ delete_statement }} in ( + select modified_ids.{{ unique_key }} + from + ( + select nullif(scd_table.{{ unique_key }}, '') as {{ unique_key }} from {{ '{{ this }}' }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table.{{ unique_key }} = modified_ids.{{ unique_key }} + where {{ active_row_column_name }} = 1 + ) scd_active_rows + right outer join ( + select + {{ '{{' }} dbt_utils.surrogate_key([ + {%- for primary_key in primary_keys %} + {{ primary_key }}, + {%- endfor %} + ]) {{ '}}' }} as {{ unique_key }} + from {{ quoted_scd_new_data_table }} + where 1=1 + {{ incremental_clause }} + ) modified_ids + on modified_ids.{{ unique_key }} = scd_active_rows.{{ unique_key }} group by modified_ids.{{ unique_key }} having count(scd_active_rows.{{ unique_key }}) = 0 ) From 328539cfe20da7a1dce358ff55def9d8d229bc59 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 1 Jun 2022 21:00:09 -0700 Subject: [PATCH 25/43] regenerate test outputs --- ..._columns_resulting_into_long_names_scd.sql | 35 +++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- .../test_simple_streams/dbt_project.yml | 107 +++++++++++------- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../renamed_dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 12 ++ .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../renamed_dedup_cdc_excluded_ab1.sql | 2 +- .../renamed_dedup_cdc_excluded_ab2.sql | 2 +- .../dedup_cdc_excluded_scd.sql | 70 +++++++++--- .../dedup_exchange_rate_scd.sql | 72 +++++++++--- .../dedup_exchange_rate.sql | 2 +- .../renamed_dedup_cdc_excluded.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 19 ++++ .../dedup_exchange_rate_stg.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 12 ++ ..._stream_with_co_1g_into_long_names_scd.sql | 35 +++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- ...ream_with_c__lting_into_long_names_scd.sql | 35 +++--- .../some_stream_that_was_empty_scd.sql | 35 +++--- .../1_prefix_startwith_number_scd.sql | 35 +++--- .../dedup_cdc_excluded_scd.sql | 35 +++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- .../multiple_column_names_conflicts_scd.sql | 35 +++--- .../test_normalization/pos_dedup_cdcx_scd.sql | 35 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 35 +++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- .../renamed_dedup_cdc_excluded_scd.sql | 35 +++--- ..._columns_resulting_into_long_names_scd.sql | 35 +++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- .../dedup_exchange_rate_scd.sql | 39 ++++--- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 35 +++--- .../DEDUP_EXCHANGE_RATE_SCD.sql | 39 ++++--- 37 files changed, 651 insertions(+), 432 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index c7cab93202373..e186b869af04c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -27,23 +27,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 240e35e088fc9..87f60de08fac0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -27,25 +27,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 78fd50f1b53d4..9ad5fee82962f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -27,25 +27,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml index 39f7bd7b02ca4..954e2c759c6e2 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" +name: airbyte_utils version: "1.0" config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: + - models +docs-paths: + - docs +analysis-paths: + - analysis +test-paths: + - tests +seed-paths: + - data +macro-paths: + - macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: + - build + - dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -50,15 +34,58 @@ models: airbyte_incremental: +tags: incremental_tables +materialized: incremental - # schema change test isn't supported in ClickHouse yet - +on_schema_change: "ignore" + +on_schema_change: ignore airbyte_tables: +tags: normalized_tables +materialized: table airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 0c7c151236fd2..5b82ef6e38ba7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -12,7 +12,7 @@ as ( --- depends_on: ref('dedup_cdc_excluded_stg') +-- depends on: _airbyte_test_normalization.dedup_cdc_excluded_scd_new_data with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index c1e8e6cb63fec..12dbba25a36d7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -12,7 +12,7 @@ as ( --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: _airbyte_test_normalization.dedup_exchange_rate_scd_new_data with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index eedc913fd45a5..018832cbb8cde 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -12,7 +12,7 @@ as ( --- depends_on: ref('renamed_dedup_cdc_excluded_stg') +-- depends on: _airbyte_test_normalization.renamed_dedup_cdc_excluded_scd_new_data with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..8a42ce8df218b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,12 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp + + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from _airbyte_test_normalization.dedup_exchange_rate_stg + + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 6e998ca141418..b0c2c4aa7fa33 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -20,5 +20,5 @@ select from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index ee41ee94585ee..842453ba39283 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -20,5 +20,5 @@ select from {{ ref('dedup_exchange_rate_ab1') }} -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index 88a3674f694b3..5d3e0d7f6abf0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -14,5 +14,5 @@ select from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index b192f4915e98f..c6885e98962eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -14,5 +14,5 @@ select from {{ ref('renamed_dedup_cdc_excluded_ab1') }} -- renamed_dedup_cdc_excluded where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 99f32737436db..0177c40f52db6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -1,32 +1,74 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. + alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + alter table {{ this }} delete where 1=0 + {% endif %} + ","drop view {{ ref('dedup_cdc_excluded_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_cdc_excluded_stg') +-- depends on: {{ ref('dedup_cdc_excluded_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_cdc_excluded_stg') }} - -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_cdc_excluded_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_cdc_excluded_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -40,7 +82,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from {{ ref('dedup_cdc_excluded_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index eff375bdc37d9..604ecef56f7df 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,21 +1,65 @@ {{ config( unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", - post_hook = ["drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- 1. Find the records which are being updated by querying the _scd_new_data model + -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 + -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included + -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). + -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, + -- so we _must_ join against the entire SCD table to find the active row for each record. + -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. + alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + group by modified_ids._airbyte_unique_key + having count(scd_active_rows._airbyte_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + alter table {{ this }} delete where 1=0 + {% endif %} + ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends_on: ref('dedup_exchange_rate_stg') +-- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} with {% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -24,11 +68,11 @@ new_data_ids as ( 'currency', 'NZD', ]) }} as _airbyte_unique_key - from new_data + from {{ ref('dedup_exchange_rate_scd_new_data') }} ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from new_data where 1 = 0 + select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -42,7 +86,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 180310a437ff6..5b8ff875d3a3b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -23,5 +23,5 @@ from {{ ref('dedup_exchange_rate_scd') }} -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 1b9cead2c4958..4051dd3178c94 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -17,5 +17,5 @@ from {{ ref('renamed_dedup_cdc_excluded_scd') }} -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} where 1 = 1 and _airbyte_active_row = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..da8774679c71f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +{% if is_incremental() %} +-- retrieve incremental "new" data +select + * +from {{ ref('dedup_exchange_rate_stg') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} +{% else %} +select * from {{ ref('dedup_exchange_rate_stg') }} +{% endif %} +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 0b4900731039d..beb710676cb02 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -20,5 +20,5 @@ select from {{ ref('dedup_exchange_rate_ab2') }} tmp -- dedup_exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} +{{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql new file mode 100644 index 0000000000000..8a42ce8df218b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql @@ -0,0 +1,12 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp + + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') + +select * from _airbyte_test_normalization.dedup_exchange_rate_stg + + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 122714df22215..ac8c5c00a968b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -25,23 +25,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 7de0f15fb0471..80bc481d8d172 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,25 +25,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index c4348e9bb1e80..806156cfa0dcb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,25 +25,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 - ) - select modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} from scd_active_rows - right outer join modified_ids on modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + select modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + from + ( + select nullif(scd_table.{{ quote('_AIRBYTE_UNIQUE_KEY') }}, '') as {{ quote('_AIRBYTE_UNIQUE_KEY') }} from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }} group by modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} having count(scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }}) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index b854279937925..ee75e2052c9de 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 6518047a27dd4..82e0d9c3b48c4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('some_stream_that_was_empty_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('some_stream_that_was_empty_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index c1ebf5ef25874..51b475868a529 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('1_prefix_startwith_number_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('1_prefix_startwith_number_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 61ed59ea7094f..4e1c287d8ca17 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 4a45ab83d50fe..cc1601f722be5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -26,25 +26,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 611fe2f676279..cd39fa7363bc6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('multiple_column_names_conflicts_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('multiple_column_names_conflicts_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 302deb09d0ab5..32f8470144da1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('pos_dedup_cdcx_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('pos_dedup_cdcx_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index c4c433cc3bd8b..a1942adb4aabf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index eca96683c58b2..90081f1578d64 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -26,25 +26,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 3a78255a1d42f..e48a79fce15fc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index bd7b368d44c9e..533f2cf15a0d8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 8da0bdee4f901..d08d58bdbfd7d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -26,25 +26,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 1b51c0ba1d938..0064da124325f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -26,25 +26,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) - select modified_ids._airbyte_unique_key from scd_active_rows - right outer join modified_ids on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key + select modified_ids._airbyte_unique_key + from + ( + select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key + where _airbyte_active_row = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from {{ ref('dedup_exchange_rate_scd_new_data') }} + where 1=1 + {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) modified_ids + on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key group by modified_ids._airbyte_unique_key having count(scd_active_rows._airbyte_unique_key) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 97a332668c8f0..1b7251a40785d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -26,23 +26,26 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'ID', - ]) }} as _AIRBYTE_UNIQUE_KEY - from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} - where 1=1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY - where _AIRBYTE_ACTIVE_ROW = 1 - ) - select modified_ids._AIRBYTE_UNIQUE_KEY from scd_active_rows - right outer join modified_ids on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY + select modified_ids._AIRBYTE_UNIQUE_KEY + from + ( + select nullif(scd_table._AIRBYTE_UNIQUE_KEY, '') as _AIRBYTE_UNIQUE_KEY from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY + where _AIRBYTE_ACTIVE_ROW = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'ID', + ]) }} as _AIRBYTE_UNIQUE_KEY + from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} + where 1=1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ) modified_ids + on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY group by modified_ids._AIRBYTE_UNIQUE_KEY having count(scd_active_rows._AIRBYTE_UNIQUE_KEY) = 0 ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index c9e108ee29783..536f07528ce1b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -26,25 +26,28 @@ -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, -- so we _must_ join against the entire SCD table to find the active row for each record. -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). + -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - with modified_ids as ( - select - {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'NZD', - ]) }} as _AIRBYTE_UNIQUE_KEY - from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} - where 1=1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - ), - scd_active_rows as ( - select scd_table.* from {{ this }} scd_table - inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY - where _AIRBYTE_ACTIVE_ROW = 1 - ) - select modified_ids._AIRBYTE_UNIQUE_KEY from scd_active_rows - right outer join modified_ids on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY + select modified_ids._AIRBYTE_UNIQUE_KEY + from + ( + select nullif(scd_table._AIRBYTE_UNIQUE_KEY, '') as _AIRBYTE_UNIQUE_KEY from {{ this }} scd_table +-- TODO is this even necessary? +-- inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY + where _AIRBYTE_ACTIVE_ROW = 1 + ) scd_active_rows + right outer join ( + select + {{ dbt_utils.surrogate_key([ + 'ID', + 'CURRENCY', + 'NZD', + ]) }} as _AIRBYTE_UNIQUE_KEY + from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} + where 1=1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ) modified_ids + on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY group by modified_ids._AIRBYTE_UNIQUE_KEY having count(scd_active_rows._AIRBYTE_UNIQUE_KEY) = 0 ) From 965777afc6c187efbe394f4c491da35b9150ef51 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 2 Jun 2022 18:54:18 -0700 Subject: [PATCH 26/43] make mssql test run on m1 mac --- .../integration_tests/dbt_integration_test.py | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index 1652e481281fe..c77e411762375 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -177,31 +177,46 @@ def setup_mssql_db(self): "-e", "ACCEPT_EULA='Y'", "-e", - f"SA_PASSWORD='{config['password']}'", + f"MSSQL_USER='{config['username']}'", "-e", - "MSSQL_PID='Standard'", + f"MSSQL_SA_PASSWORD='{config['password']}'", + "-e", + "MSSQL_PID='Developer'", "-p", f"{config['port']}:1433", "-d", - "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04", + "mcr.microsoft.com/azure-sql-edge", ] # cmds & parameters cmd_start_container = " ".join(command_start_container) wait_sec = 30 # run the docker container print("Executing: ", cmd_start_container) - subprocess.check_call(cmd_start_container, shell=True) + # For some reason, this command doesn't work if we call it with command_start_container and remove the shell=True param + container_id = subprocess.run(cmd_start_container, shell=True, capture_output=True, text=True, check=True).stdout.strip() # wait for service is available print(f"....Waiting for MS SQL Server to start...{wait_sec} sec") time.sleep(wait_sec) - # Run additional commands to prepare the table + + # The arm64 version of azure-sql-edge doesn't include sqlcmd, so we have to run a separate mssql-tools container + # So first we need to find the IP of the SQL Server container + command_find_container_ip = [ + "docker", + "inspect", + "-f", + "{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}", + container_id, + ] + container_ip = subprocess.run(command_find_container_ip, capture_output=True, text=True).stdout.strip() + + # Run additional commands to prepare the table by starting a new mssql-tools container and connecting to that IP command_create_db = [ "docker", - "exec", - f"{self.container_prefix}_mssql", + "run", + "mcr.microsoft.com/mssql-tools", "/opt/mssql-tools/bin/sqlcmd", "-S", - config["host"], + container_ip, "-U", config["username"], "-P", From d828dfb8a978206b6e42e1fbcc03ec54f798b109 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 3 Jun 2022 08:56:58 -0700 Subject: [PATCH 27/43] Revert "make mssql test run on m1 mac" This reverts commit 965777afc6c187efbe394f4c491da35b9150ef51. --- .../integration_tests/dbt_integration_test.py | 31 +++++-------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index c77e411762375..1652e481281fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -177,46 +177,31 @@ def setup_mssql_db(self): "-e", "ACCEPT_EULA='Y'", "-e", - f"MSSQL_USER='{config['username']}'", + f"SA_PASSWORD='{config['password']}'", "-e", - f"MSSQL_SA_PASSWORD='{config['password']}'", - "-e", - "MSSQL_PID='Developer'", + "MSSQL_PID='Standard'", "-p", f"{config['port']}:1433", "-d", - "mcr.microsoft.com/azure-sql-edge", + "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04", ] # cmds & parameters cmd_start_container = " ".join(command_start_container) wait_sec = 30 # run the docker container print("Executing: ", cmd_start_container) - # For some reason, this command doesn't work if we call it with command_start_container and remove the shell=True param - container_id = subprocess.run(cmd_start_container, shell=True, capture_output=True, text=True, check=True).stdout.strip() + subprocess.check_call(cmd_start_container, shell=True) # wait for service is available print(f"....Waiting for MS SQL Server to start...{wait_sec} sec") time.sleep(wait_sec) - - # The arm64 version of azure-sql-edge doesn't include sqlcmd, so we have to run a separate mssql-tools container - # So first we need to find the IP of the SQL Server container - command_find_container_ip = [ - "docker", - "inspect", - "-f", - "{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}", - container_id, - ] - container_ip = subprocess.run(command_find_container_ip, capture_output=True, text=True).stdout.strip() - - # Run additional commands to prepare the table by starting a new mssql-tools container and connecting to that IP + # Run additional commands to prepare the table command_create_db = [ "docker", - "run", - "mcr.microsoft.com/mssql-tools", + "exec", + f"{self.container_prefix}_mssql", "/opt/mssql-tools/bin/sqlcmd", "-S", - container_ip, + config["host"], "-U", config["username"], "-P", From 600b084bf65737d9befa16250d156be80941cc41 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 3 Jun 2022 08:57:47 -0700 Subject: [PATCH 28/43] fix drop view for mssql --- .../transform_catalog/stream_processor.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 44d09635c3403..647e0d2f2f26d 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1248,10 +1248,16 @@ def add_to_outputs( f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", ) else: - # Note the different macro styles: - # scd_new_data_table is a DBT ref() macro, so we wrap it in another {{ ... }} so that DBT will resolve it - # stg_schema+stg_table are plain strings, so they need to be rendered as plain strings - hooks.append(f"drop view {{{{ {scd_new_data_table} }}}}") + # Note that we can't directly use scd_new_data_table (which is a dbt ref() macro) + # because MSSQL returns an error ('DROP VIEW' does not allow specifying the database name as a prefix to the object name) + + scd_new_data_table_raw_name = self.tables_registry.get_file_name( + schema, self.json_path, self.stream_name, "scd_new_data", truncate_name + ) + if self.name_transformer.needs_quotes(scd_new_data_table_raw_name): + scd_new_data_table_raw_name = jinja_call(self.name_transformer.apply_quote(scd_new_data_table_raw_name)) + + hooks.append(f"drop view {stg_schema}.{scd_new_data_table_raw_name}") hooks.append(f"drop view {stg_schema}.{stg_table}") config["post_hook"] = "[" + ",".join(map(lambda hook: '"' + hook + '"', hooks)) + "]" else: From 1a295e87823d14583c7432e49fd3952ca2ee3252 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 3 Jun 2022 08:58:01 -0700 Subject: [PATCH 29/43] fix for linter --- .../normalization/transform_catalog/stream_processor.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 647e0d2f2f26d..398e49f19d62e 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1259,7 +1259,12 @@ def add_to_outputs( hooks.append(f"drop view {stg_schema}.{scd_new_data_table_raw_name}") hooks.append(f"drop view {stg_schema}.{stg_table}") - config["post_hook"] = "[" + ",".join(map(lambda hook: '"' + hook + '"', hooks)) + "]" + + # Explicit function so that we can have type hints to satisfy the linter + def wrap_in_quotes(s: str) -> str: + return '"' + s + '"' + + config["post_hook"] = "[" + ",".join(map(wrap_in_quotes, hooks)) + "]" else: # incremental is handled in the SCD SQL already sql = self.add_incremental_clause(sql) From d6f7ca6b389456a6e884ea5270fa9c1ade02e39c Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 9 Jun 2022 13:56:25 -0700 Subject: [PATCH 30/43] much simpler query --- .../transform_catalog/stream_processor.py | 66 ++++++++----------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 398e49f19d62e..8b38dc3b1fc00 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -319,6 +319,7 @@ def process(self) -> List["StreamProcessor"]: unique_key=self.get_unique_key(), partition_by=PartitionScheme.UNIQUE_KEY, column_names=column_names, + scd_table=from_table, ) return self.find_children_streams(from_table, column_names) @@ -1103,6 +1104,10 @@ def add_incremental_clause(self, sql_query: str) -> Any: def get_incremental_clause(self, tablename: str) -> Any: return "{{ incremental_clause(" + self.get_emitted_at(in_jinja=True) + ", " + tablename + ") }}" + # TODO add optional param to get_incremental_clause + def get_incremental_clause_normalized_at(self, tablename: str) -> Any: + return "{{ incremental_clause(" + self.get_normalized_at(in_jinja=True) + ", " + tablename + ") }}" + @staticmethod def list_fields(column_names: Dict[str, Tuple[str, str]]) -> List[str]: return [column_names[field][0] for field in column_names] @@ -1119,6 +1124,7 @@ def add_to_outputs( do_deletions: bool = False, column_names: Dict[str, Tuple[str, str]] = {}, scd_new_data_table: str = "", + scd_table: str = "", ) -> str: schema = self.get_schema(is_intermediate) # MySQL table names need to be manually truncated, because it does not do it automatically @@ -1150,19 +1156,18 @@ def add_to_outputs( active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") if self.destination_type == DestinationType.CLICKHOUSE: # Clickhouse has special delete syntax - delete_statement = "alter table {{ final_table_relation }} delete where " + self.get_unique_key(in_jinja=False) + delete_statement = "alter table {{ final_table_relation }} delete" + unique_key_reference = self.get_unique_key(in_jinja=False) noop_delete_statement = "alter table {{ this }} delete where 1=0" elif self.destination_type == DestinationType.BIGQUERY: # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; # it requires "delete from project.schema.table table_alias where table_alias.column in" - delete_statement = "delete from {{ final_table_relation }} final_table where final_table." + self.get_unique_key( - in_jinja=False - ) + delete_statement = "delete from {{ final_table_relation }} final_table" + unique_key_reference = "final_table." + self.get_unique_key(in_jinja=False) noop_delete_statement = "delete from {{ this }} where 1=0" else: - delete_statement = "delete from {{ final_table_relation }} where {{ final_table_relation }}." + self.get_unique_key( - in_jinja=False - ) + delete_statement = "delete from {{ final_table_relation }}" + unique_key_reference = "{{ final_table_relation }}." + self.get_unique_key(in_jinja=False) noop_delete_statement = "delete from {{ this }} where 1=0" deletion_hook = Template( """ @@ -1183,37 +1188,17 @@ def add_to_outputs( {{ '%}' }} -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - {{ delete_statement }} in ( - select modified_ids.{{ unique_key }} - from - ( - select nullif(scd_table.{{ unique_key }}, '') as {{ unique_key }} from {{ '{{ this }}' }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table.{{ unique_key }} = modified_ids.{{ unique_key }} - where {{ active_row_column_name }} = 1 - ) scd_active_rows - right outer join ( - select - {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} - ]) {{ '}}' }} as {{ unique_key }} - from {{ quoted_scd_new_data_table }} - where 1=1 - {{ incremental_clause }} - ) modified_ids - on modified_ids.{{ unique_key }} = scd_active_rows.{{ unique_key }} - group by modified_ids.{{ unique_key }} - having count(scd_active_rows.{{ unique_key }}) = 0 + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + {{ delete_statement }} where {{ unique_key_reference }} in ( + select {{ unique_key }} + from {{ '{{ this }}' }} + where 1 = 1 {{ normalized_at_incremental_clause }} + ) and {{ unique_key_reference }} not in ( + select {{ unique_key }} + from {{ '{{ this }}' }} + where _airbyte_active_row = 1 {{ normalized_at_incremental_clause }} ) {{ '{% else %}' }} -- We have to have a non-empty query, so just do a noop delete @@ -1236,6 +1221,11 @@ def add_to_outputs( scd_new_data_table=scd_new_data_table, quoted_scd_new_data_table=jinja_call(scd_new_data_table), active_row_column_name=active_row_column_name, + scd_table=scd_table, + normalized_at_incremental_clause=self.get_incremental_clause_normalized_at( + "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name) + ), + unique_key_reference=unique_key_reference, ) hooks.append(deletion_hook) From b542b4c7d5b8cd70ffba3dd6f55461a1f8a8aba6 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 9 Jun 2022 15:44:46 -0700 Subject: [PATCH 31/43] cleanup --- .../transform_catalog/stream_processor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 8b38dc3b1fc00..d3e2f13162dfe 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1102,11 +1102,10 @@ def add_incremental_clause(self, sql_query: str) -> Any: return sql def get_incremental_clause(self, tablename: str) -> Any: - return "{{ incremental_clause(" + self.get_emitted_at(in_jinja=True) + ", " + tablename + ") }}" + return self.get_incremental_clause_for_column(tablename, self.get_emitted_at(in_jinja=True)) - # TODO add optional param to get_incremental_clause - def get_incremental_clause_normalized_at(self, tablename: str) -> Any: - return "{{ incremental_clause(" + self.get_normalized_at(in_jinja=True) + ", " + tablename + ") }}" + def get_incremental_clause_for_column(self, tablename: str, column: str) -> Any: + return "{{ incremental_clause(" + column + ", " + tablename + ") }}" @staticmethod def list_fields(column_names: Dict[str, Tuple[str, str]]) -> List[str]: @@ -1222,8 +1221,9 @@ def add_to_outputs( quoted_scd_new_data_table=jinja_call(scd_new_data_table), active_row_column_name=active_row_column_name, scd_table=scd_table, - normalized_at_incremental_clause=self.get_incremental_clause_normalized_at( - "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name) + normalized_at_incremental_clause=self.get_incremental_clause_for_column( + "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name), + self.get_normalized_at(in_jinja=True), ), unique_key_reference=unique_key_reference, ) From d20550558a418fe6d30a0e8e421cc5547ec6c609 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 9 Jun 2022 15:57:37 -0700 Subject: [PATCH 32/43] cleanup --- .../transform_catalog/stream_processor.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index d3e2f13162dfe..291f7a86d4d6e 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1197,7 +1197,7 @@ def add_to_outputs( ) and {{ unique_key_reference }} not in ( select {{ unique_key }} from {{ '{{ this }}' }} - where _airbyte_active_row = 1 {{ normalized_at_incremental_clause }} + where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} ) {{ '{% else %}' }} -- We have to have a non-empty query, so just do a noop delete @@ -1208,19 +1208,9 @@ def add_to_outputs( delete_statement=delete_statement, noop_delete_statement=noop_delete_statement, final_table_name=final_table_name, - quoted_final_table_name=jinja_call(self.name_transformer.apply_quote(final_table_name)), unique_key=self.get_unique_key(in_jinja=False), quoted_unique_key=self.get_unique_key(in_jinja=True), - primary_keys=self.list_primary_keys(column_names), - stg_schema=stg_schema, - stg_table=stg_table, - incremental_clause=self.get_incremental_clause( - "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name) - ), - scd_new_data_table=scd_new_data_table, - quoted_scd_new_data_table=jinja_call(scd_new_data_table), active_row_column_name=active_row_column_name, - scd_table=scd_table, normalized_at_incremental_clause=self.get_incremental_clause_for_column( "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name), self.get_normalized_at(in_jinja=True), From 09814a327793831b438561e7f7938439d22028f6 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 9 Jun 2022 16:23:48 -0700 Subject: [PATCH 33/43] remove new_data model --- .../transform_catalog/stream_processor.py | 74 ++++--------------- 1 file changed, 16 insertions(+), 58 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 291f7a86d4d6e..f61f365b77e1d 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -291,15 +291,8 @@ def process(self) -> List["StreamProcessor"]: suffix="stg", ) - scd_new_data_table = self.add_to_outputs( - self.generate_scd_new_data_model(from_table), - materialization_mode=forced_materialization_type, - is_intermediate=True, - suffix="scd_new_data", - ) - from_table = self.add_to_outputs( - self.generate_scd_type_2_model(from_table, scd_new_data_table, column_names), + self.generate_scd_type_2_model(from_table, column_names), self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), is_intermediate=False, suffix="scd", @@ -307,8 +300,6 @@ def process(self) -> List["StreamProcessor"]: unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), partition_by=PartitionScheme.ACTIVE_ROW, do_deletions=True, - column_names=column_names, - scd_new_data_table=scd_new_data_table, ) where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows @@ -318,8 +309,6 @@ def process(self) -> List["StreamProcessor"]: is_intermediate=False, unique_key=self.get_unique_key(), partition_by=PartitionScheme.UNIQUE_KEY, - column_names=column_names, - scd_table=from_table, ) return self.find_children_streams(from_table, column_names) @@ -708,31 +697,7 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De return col - def generate_scd_new_data_model(self, from_table: str) -> Any: - jinja_variables = { - "from_table": from_table, - "quoted_col_emitted_at": self.get_emitted_at(in_jinja=True), - "sql_table_comment": self.sql_table_comment(include_from_table=True), - } - sql = Template( - """ --- depends_on: {{ from_table }} -{{ '{% if is_incremental() %}' }} --- retrieve incremental "new" data -select - * -from {{'{{'}} {{ from_table }} {{'}}'}} -{{ sql_table_comment }} -where 1 = 1 -{{'{{'}} incremental_clause({{ quoted_col_emitted_at }}, this) {{'}}'}} -{{ '{% else %}' }} -select * from {{'{{'}} {{ from_table }} {{'}}'}} -{{ '{% endif %}' }} -""" - ).render(jinja_variables) - return sql - - def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: cursor_field = self.get_cursor_field(column_names) order_null = f"is null asc,\n {cursor_field} desc" if self.destination_type.value == DestinationType.ORACLE.value: @@ -824,10 +789,10 @@ def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column "fields": self.list_fields(column_names), "from_table": from_table, "hash_id": self.hash_id(), + "incremental_clause": self.get_incremental_clause("this"), "input_data_table": input_data_table, "lag_begin": lag_begin, "lag_end": lag_end, - "new_data_table": new_data_table, "order_null": order_null, "parent_hash_id": self.parent_hash_id(), "primary_key_partition": self.get_primary_key_partition(column_names), @@ -885,9 +850,18 @@ def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column jinja_variables["scd_columns_sql"] = scd_columns_sql sql = Template( """ --- depends on: {{ '{{' }} {{ new_data_table }} {{ '}}' }} +-- depends on: {{ from_table }} with {{ '{% if is_incremental() %}' }} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{'{{'}} {{ from_table }} {{'}}'}} + {{ sql_table_comment }} + where 1 = 1 + {{ incremental_clause }} +), new_data_ids as ( -- build a subset of {{ unique_key }} from rows that are new select distinct @@ -896,11 +870,11 @@ def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column {{ primary_key }}, {%- endfor %} ]) {{ '}}' }} as {{ unique_key }} - from {{ '{{' }} {{ new_data_table }} {{ '}}' }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ '{{' }} {{ new_data_table }} {{ '}}' }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -914,7 +888,7 @@ def generate_scd_type_2_model(self, from_table: str, new_data_table: str, column where {{ active_row }} = 1 ), input_data as ( - select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from {{ '{{' }} {{ new_data_table }} {{ '}}' }} + select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from new_data union all select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from previous_active_scd_data ), @@ -1121,9 +1095,6 @@ def add_to_outputs( subdir: str = "", partition_by: PartitionScheme = PartitionScheme.DEFAULT, do_deletions: bool = False, - column_names: Dict[str, Tuple[str, str]] = {}, - scd_new_data_table: str = "", - scd_table: str = "", ) -> str: schema = self.get_schema(is_intermediate) # MySQL table names need to be manually truncated, because it does not do it automatically @@ -1221,23 +1192,10 @@ def add_to_outputs( if self.destination_type.value == DestinationType.POSTGRES.value: # Keep only rows with the max emitted_at to keep incremental behavior - hooks.append( - f"delete from {{{{ {scd_new_data_table} }}}} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {{{{ {scd_new_data_table} }}}})", - ) hooks.append( f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", ) else: - # Note that we can't directly use scd_new_data_table (which is a dbt ref() macro) - # because MSSQL returns an error ('DROP VIEW' does not allow specifying the database name as a prefix to the object name) - - scd_new_data_table_raw_name = self.tables_registry.get_file_name( - schema, self.json_path, self.stream_name, "scd_new_data", truncate_name - ) - if self.name_transformer.needs_quotes(scd_new_data_table_raw_name): - scd_new_data_table_raw_name = jinja_call(self.name_transformer.apply_quote(scd_new_data_table_raw_name)) - - hooks.append(f"drop view {stg_schema}.{scd_new_data_table_raw_name}") hooks.append(f"drop view {stg_schema}.{stg_table}") # Explicit function so that we can have type hints to satisfy the linter From e36b305ccb96464e44a4a429fbea786ccb5c7ac7 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 10 Jun 2022 09:11:04 -0700 Subject: [PATCH 34/43] remove do_deletions flag --- .../transform_catalog/stream_processor.py | 134 +++++++++--------- 1 file changed, 65 insertions(+), 69 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index f61f365b77e1d..ed88c0e2194b0 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -299,7 +299,6 @@ def process(self) -> List["StreamProcessor"]: subdir="scd", unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), partition_by=PartitionScheme.ACTIVE_ROW, - do_deletions=True, ) where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows @@ -1094,7 +1093,6 @@ def add_to_outputs( unique_key: str = "", subdir: str = "", partition_by: PartitionScheme = PartitionScheme.DEFAULT, - do_deletions: bool = False, ) -> str: schema = self.get_schema(is_intermediate) # MySQL table names need to be manually truncated, because it does not do it automatically @@ -1120,75 +1118,73 @@ def add_to_outputs( if suffix == "scd": hooks = [] - # This delete query depends on the _stg model, so run it before we drop/update the _stg view - if do_deletions: - final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) - active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") - if self.destination_type == DestinationType.CLICKHOUSE: - # Clickhouse has special delete syntax - delete_statement = "alter table {{ final_table_relation }} delete" - unique_key_reference = self.get_unique_key(in_jinja=False) - noop_delete_statement = "alter table {{ this }} delete where 1=0" - elif self.destination_type == DestinationType.BIGQUERY: - # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; - # it requires "delete from project.schema.table table_alias where table_alias.column in" - delete_statement = "delete from {{ final_table_relation }} final_table" - unique_key_reference = "final_table." + self.get_unique_key(in_jinja=False) - noop_delete_statement = "delete from {{ this }} where 1=0" - else: - delete_statement = "delete from {{ final_table_relation }}" - unique_key_reference = "{{ final_table_relation }}." + self.get_unique_key(in_jinja=False) - noop_delete_statement = "delete from {{ this }} where 1=0" - deletion_hook = Template( - """ - {{ '{%' }} - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='{{ final_table_name }}' - ) - {{ '%}' }} - {{ '{#' }} - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - {{ '#}' }} - {{ '{%' }} - if final_table_relation is not none and {{ quoted_unique_key }} in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - {{ '%}' }} - - -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active - {{ delete_statement }} where {{ unique_key_reference }} in ( - select {{ unique_key }} - from {{ '{{ this }}' }} - where 1 = 1 {{ normalized_at_incremental_clause }} - ) and {{ unique_key_reference }} not in ( - select {{ unique_key }} - from {{ '{{ this }}' }} - where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} + final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) + active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") + if self.destination_type == DestinationType.CLICKHOUSE: + # Clickhouse has special delete syntax + delete_statement = "alter table {{ final_table_relation }} delete" + unique_key_reference = self.get_unique_key(in_jinja=False) + noop_delete_statement = "alter table {{ this }} delete where 1=0" + elif self.destination_type == DestinationType.BIGQUERY: + # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; + # it requires "delete from project.schema.table table_alias where table_alias.column in" + delete_statement = "delete from {{ final_table_relation }} final_table" + unique_key_reference = "final_table." + self.get_unique_key(in_jinja=False) + noop_delete_statement = "delete from {{ this }} where 1=0" + else: + delete_statement = "delete from {{ final_table_relation }}" + unique_key_reference = "{{ final_table_relation }}." + self.get_unique_key(in_jinja=False) + noop_delete_statement = "delete from {{ this }} where 1=0" + deletion_hook = Template( + """ + {{ '{%' }} + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='{{ final_table_name }}' ) - {{ '{% else %}' }} - -- We have to have a non-empty query, so just do a noop delete - {{ noop_delete_statement }} - {{ '{% endif %}' }} - """ - ).render( - delete_statement=delete_statement, - noop_delete_statement=noop_delete_statement, - final_table_name=final_table_name, - unique_key=self.get_unique_key(in_jinja=False), - quoted_unique_key=self.get_unique_key(in_jinja=True), - active_row_column_name=active_row_column_name, - normalized_at_incremental_clause=self.get_incremental_clause_for_column( - "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name), - self.get_normalized_at(in_jinja=True), - ), - unique_key_reference=unique_key_reference, + {{ '%}' }} + {{ '{#' }} + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + {{ '#}' }} + {{ '{%' }} + if final_table_relation is not none and {{ quoted_unique_key }} in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + {{ '%}' }} + + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + {{ delete_statement }} where {{ unique_key_reference }} in ( + select {{ unique_key }} + from {{ '{{ this }}' }} + where 1 = 1 {{ normalized_at_incremental_clause }} + ) and {{ unique_key_reference }} not in ( + select {{ unique_key }} + from {{ '{{ this }}' }} + where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} ) - hooks.append(deletion_hook) + {{ '{% else %}' }} + -- We have to have a non-empty query, so just do a noop delete + {{ noop_delete_statement }} + {{ '{% endif %}' }} + """ + ).render( + delete_statement=delete_statement, + noop_delete_statement=noop_delete_statement, + final_table_name=final_table_name, + unique_key=self.get_unique_key(in_jinja=False), + quoted_unique_key=self.get_unique_key(in_jinja=True), + active_row_column_name=active_row_column_name, + normalized_at_incremental_clause=self.get_incremental_clause_for_column( + "this.schema + '.' + " + self.name_transformer.apply_quote(final_table_name), + self.get_normalized_at(in_jinja=True), + ), + unique_key_reference=unique_key_reference, + ) + hooks.append(deletion_hook) if self.destination_type.value == DestinationType.POSTGRES.value: # Keep only rows with the max emitted_at to keep incremental behavior From 05c560c0a60a8898f0e335f8f8eb13928a459bf7 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 10 Jun 2022 09:20:47 -0700 Subject: [PATCH 35/43] regenerate outputs --- .../test_nested_streams/dbt_project.yml | 2 - ..._columns_resulting_into_long_names_scd.sql | 2 +- ...resulting_into_long_names_scd_new_data.sql | 11 -- ..._columns_resulting_into_long_names_scd.sql | 99 ++++++++--------- ...resulting_into_long_names_scd_new_data.sql | 21 ---- ...resulting_into_long_names_scd_new_data.sql | 11 -- .../test_simple_streams/dbt_project.yml | 2 - .../test_simple_streams/first_dbt_project.yml | 6 -- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 11 -- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 21 ---- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 21 ---- .../dedup_exchange_rate_scd_new_data.sql | 11 -- .../dedup_exchange_rate_scd_new_data.sql | 11 -- .../test_simple_streams/dbt_project.yml | 6 -- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../renamed_dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 12 --- .../dedup_cdc_excluded_scd.sql | 99 ++++++++--------- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 19 ---- .../dedup_exchange_rate_scd_new_data.sql | 12 --- .../mysql/test_nested_streams/dbt_project.yml | 2 - ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- ..._stream_with_co_1ng_names_scd_new_data.sql | 9 -- ..._stream_with_co_1g_into_long_names_scd.sql | 99 ++++++++--------- ..._stream_with_co_1ng_names_scd_new_data.sql | 19 ---- ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- ..._stream_with_co_1ng_names_scd_new_data.sql | 9 -- .../mysql/test_simple_streams/dbt_project.yml | 6 -- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 -- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 19 ---- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 -- .../test_simple_streams/dbt_project.yml | 6 -- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 -- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 19 ---- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 9 -- .../test_nested_streams/dbt_project.yml | 2 - ...ream_with_c__lting_into_long_names_scd.sql | 2 +- .../some_stream_that_was_empty_scd.sql | 2 +- ..._c__lting_into_long_names_scd_new_data.sql | 13 --- ...ome_stream_that_was_empty_scd_new_data.sql | 13 --- ...ream_with_c__lting_into_long_names_scd.sql | 99 ++++++++--------- .../some_stream_that_was_empty_scd.sql | 99 ++++++++--------- ..._c__lting_into_long_names_scd_new_data.sql | 20 ---- ...ome_stream_that_was_empty_scd_new_data.sql | 20 ---- ..._c__lting_into_long_names_scd_new_data.sql | 15 --- ...ome_stream_that_was_empty_scd_new_data.sql | 15 --- .../test_simple_streams/dbt_project.yml | 2 - .../test_simple_streams/first_dbt_project.yml | 6 -- .../1_prefix_startwith_number_scd.sql | 2 +- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../multiple_column_names_conflicts_scd.sql | 2 +- .../test_normalization/pos_dedup_cdcx_scd.sql | 2 +- .../renamed_dedup_cdc_excluded_scd.sql | 2 +- ...1_prefix_startwith_number_scd_new_data.sql | 13 --- .../dedup_cdc_excluded_scd_new_data.sql | 13 --- .../dedup_exchange_rate_scd_new_data.sql | 13 --- ...le_column_names_conflicts_scd_new_data.sql | 13 --- .../pos_dedup_cdcx_scd_new_data.sql | 13 --- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 13 --- .../1_prefix_startwith_number_scd.sql | 99 ++++++++--------- .../dedup_cdc_excluded_scd.sql | 99 ++++++++--------- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../multiple_column_names_conflicts_scd.sql | 99 ++++++++--------- .../test_normalization/pos_dedup_cdcx_scd.sql | 99 ++++++++--------- .../renamed_dedup_cdc_excluded_scd.sql | 99 ++++++++--------- ...1_prefix_startwith_number_scd_new_data.sql | 20 ---- .../dedup_cdc_excluded_scd_new_data.sql | 20 ---- .../dedup_exchange_rate_scd_new_data.sql | 20 ---- ...le_column_names_conflicts_scd_new_data.sql | 20 ---- .../pos_dedup_cdcx_scd_new_data.sql | 20 ---- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 20 ---- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../renamed_dedup_cdc_excluded_scd.sql | 99 ++++++++--------- .../dedup_exchange_rate_scd_new_data.sql | 20 ---- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 20 ---- ...1_prefix_startwith_number_scd_new_data.sql | 15 --- .../dedup_cdc_excluded_scd_new_data.sql | 15 --- .../dedup_exchange_rate_scd_new_data.sql | 15 --- ...le_column_names_conflicts_scd_new_data.sql | 15 --- .../pos_dedup_cdcx_scd_new_data.sql | 15 --- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 15 --- .../dedup_exchange_rate_scd_new_data.sql | 15 --- ...enamed_dedup_cdc_excluded_scd_new_data.sql | 15 --- .../test_nested_streams/dbt_project.yml | 2 - ..._columns_resulting_into_long_names_scd.sql | 2 +- ...resulting_into_long_names_scd_new_data.sql | 10 -- ..._columns_resulting_into_long_names_scd.sql | 99 ++++++++--------- ...resulting_into_long_names_scd_new_data.sql | 20 ---- ...resulting_into_long_names_scd_new_data.sql | 10 -- .../test_simple_streams/dbt_project.yml | 2 - .../test_simple_streams/first_dbt_project.yml | 6 -- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd_new_data.sql | 10 -- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 20 ---- .../dedup_exchange_rate_scd.sql | 101 ++++++++---------- .../dedup_exchange_rate_scd_new_data.sql | 20 ---- .../dedup_exchange_rate_scd_new_data.sql | 10 -- .../dedup_exchange_rate_scd_new_data.sql | 10 -- .../test_nested_streams/dbt_project.yml | 2 - ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 2 +- ...RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql | 11 -- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 99 ++++++++--------- ...RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql | 20 ---- ...RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql | 11 -- .../test_simple_streams/dbt_project.yml | 6 -- .../DEDUP_EXCHANGE_RATE_SCD.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql | 11 -- .../DEDUP_EXCHANGE_RATE_SCD.sql | 101 ++++++++---------- .../DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql | 20 ---- .../DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql | 11 -- 123 files changed, 1058 insertions(+), 2263 deletions(-) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml index 2d19183efb914..68ca41b91d53f 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml @@ -52,7 +52,6 @@ vars: nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names @@ -62,7 +61,6 @@ vars: some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index eaaa727c2ab48..45aa53937e094 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -9,7 +9,7 @@ OPTIONS() as ( --- depends on: `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data` +-- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql deleted file mode 100644 index c31b4342f359b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data` - OPTIONS() - as --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') - -select * from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg` - -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index e186b869af04c..7ba778639fef4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -4,73 +4,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_complex_columns_resulting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_complex_columns_resulting_into_long_names' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }}","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) and final_table._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} +-- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -84,7 +75,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data union all select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql deleted file mode 100644 index c3d4e9d14cbde..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} --- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql deleted file mode 100644 index c31b4342f359b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data` - OPTIONS() - as --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') - -select * from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg` - -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml index de82a6ed3bfa8..daf19b9c9377c 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml @@ -56,12 +56,10 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml index 7ae3dd92eace7..200e87ca5ea7c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml @@ -56,36 +56,30 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 1b35a3d334830..c8203853d54d2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -9,7 +9,7 @@ OPTIONS() as ( --- depends on: `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 1ee899b1ca7d4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` - OPTIONS() - as --- depends_on: ref('dedup_exchange_rate_stg') - -select * from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 87f60de08fac0..423aad4f8dff0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,64 +4,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and final_table._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -70,11 +59,11 @@ new_data_ids as ( 'currency', 'NZD', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -88,7 +77,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index d4132ab83a4d3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 9ad5fee82962f..c44384f3cda1d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,64 +4,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and final_table._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -70,11 +59,11 @@ new_data_ids as ( 'currency', 'NZD', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -88,7 +77,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index d4132ab83a4d3..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,21 +0,0 @@ -{{ config( - cluster_by = "_airbyte_emitted_at", - partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 1ee899b1ca7d4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` - OPTIONS() - as --- depends_on: ref('dedup_exchange_rate_stg') - -select * from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 1ee899b1ca7d4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,11 +0,0 @@ - - - create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` - OPTIONS() - as --- depends_on: ref('dedup_exchange_rate_stg') - -select * from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` - -; - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml index 954e2c759c6e2..02cf2fd559fd9 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml @@ -56,36 +56,30 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 5b82ef6e38ba7..5bdb0dc1df913 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -12,7 +12,7 @@ as ( --- depends on: _airbyte_test_normalization.dedup_cdc_excluded_scd_new_data +-- depends on: ref('dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 12dbba25a36d7..bd961b05351de 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -12,7 +12,7 @@ as ( --- depends on: _airbyte_test_normalization.dedup_exchange_rate_scd_new_data +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 018832cbb8cde..682ba8ff69576 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -12,7 +12,7 @@ as ( --- depends on: _airbyte_test_normalization.renamed_dedup_cdc_excluded_scd_new_data +-- depends on: ref('renamed_dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 8a42ce8df218b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,12 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp - - as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from _airbyte_test_normalization.dedup_exchange_rate_stg - - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 0177c40f52db6..4e6f8d2a11928 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -2,73 +2,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - alter table {{ this }} delete where 1=0 - {% endif %} - ","drop view {{ ref('dedup_cdc_excluded_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + ) and _airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + alter table {{ this }} delete where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_cdc_excluded_scd_new_data') }} +-- depends on: ref('dedup_cdc_excluded_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from {{ ref('dedup_cdc_excluded_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_cdc_excluded_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -82,7 +73,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from {{ ref('dedup_cdc_excluded_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 604ecef56f7df..3490631ea19bf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,64 +2,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - alter table {{ this }} delete where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + ) and _airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + alter table {{ this }} delete where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -68,11 +57,11 @@ new_data_ids as ( 'currency', 'NZD', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -86,7 +75,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index da8774679c71f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 8a42ce8df218b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,12 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp - - as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from _airbyte_test_normalization.dedup_exchange_rate_stg - - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml index 3854bb416d73e..937a423ec05dd 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml @@ -48,7 +48,6 @@ vars: nested_stream_with_co_1g_into_long_names_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names nested_stream_with_co_1g_into_long_names_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names nested_stream_with_co_1g_into_long_names_stg: test_normalization._airbyte_raw_nested_s__lting_into_long_names - nested_stream_with_co_1ng_names_scd_new_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names nested_stream_with_co_1g_into_long_names_scd: test_normalization._airbyte_raw_nested_s__lting_into_long_names nested_stream_with_co__lting_into_long_names: test_normalization._airbyte_raw_nested_s__lting_into_long_names non_nested_stream_wit_1g_into_long_names_ab1: test_normalization._airbyte_raw_non_nest__lting_into_long_names @@ -58,7 +57,6 @@ vars: some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty simple_stream_with_na_1g_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index a78a228cfe085..d36afc59a4fa2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -4,7 +4,7 @@ test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` as ( --- depends on: _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data` +-- depends on: ref('nested_stream_with_co_1g_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql deleted file mode 100644 index cadcb57e95c57..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - create view _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data__dbt_tmp` as ( - --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') - -select * from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index ac8c5c00a968b..1768d23641384 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -2,73 +2,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_co__lting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_co__lting_into_long_names' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }}","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} +-- depends on: ref('nested_stream_with_co_1g_into_long_names_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -82,7 +73,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from {{ ref('nested_stream_with_co_1ng_names_scd_new_data') }} + select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from new_data union all select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql deleted file mode 100644 index f240e35e6d7dd..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} --- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index a78a228cfe085..d36afc59a4fa2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -4,7 +4,7 @@ test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` as ( --- depends on: _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data` +-- depends on: ref('nested_stream_with_co_1g_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql deleted file mode 100644 index cadcb57e95c57..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_co_1ng_names_scd_new_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - create view _airbyte_test_normalization.`nested_stream_with_co_1ng_names_scd_new_data__dbt_tmp` as ( - --- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') - -select * from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml index e47faa78dc483..bddbc9e03a806 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml @@ -52,36 +52,30 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 98f20af5708e7..f837bab8d5381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ test_normalization.`dedup_exchange_rate_scd__dbt_tmp` as ( --- depends on: _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 9acd13c900265..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - create view _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data__dbt_tmp` as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from _airbyte_test_normalization.`dedup_exchange_rate_stg` - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 80bc481d8d172..365b40479baa3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,64 +2,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -68,11 +57,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -86,7 +75,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index da8774679c71f..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 98f20af5708e7..f837bab8d5381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ test_normalization.`dedup_exchange_rate_scd__dbt_tmp` as ( --- depends on: _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data` +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 9acd13c900265..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - create view _airbyte_test_normalization.`dedup_exchange_rate_scd_new_data__dbt_tmp` as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from _airbyte_test_normalization.`dedup_exchange_rate_stg` - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml index 9705c3694a774..a696787c00ab2 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml @@ -52,36 +52,30 @@ vars: dedup_exchange_rate_ab1: test_normalization.airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization.airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization.airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization.airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization.airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization.airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_stg: test_normalization.airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd_new_data: test_normalization.airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_scd: test_normalization.airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded: test_normalization.airbyte_raw_dedup_cdc_excluded pos_dedup_cdcx_ab1: test_normalization.airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_ab2: test_normalization.airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_stg: test_normalization.airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd_new_data: test_normalization.airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_scd: test_normalization.airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx: test_normalization.airbyte_raw_pos_dedup_cdcx ab_1_prefix_startwith_number_ab1: test_normalization.airbyte_raw_1_prefix_startwith_number ab_1_prefix_startwith_number_ab2: test_normalization.airbyte_raw_1_prefix_startwith_number ab_1_prefix_startwith_number_stg: test_normalization.airbyte_raw_1_prefix_startwith_number - ab_1_prefix_startwith_number_scd_new_data: test_normalization.airbyte_raw_1_prefix_startwith_number ab_1_prefix_startwith_number_scd: test_normalization.airbyte_raw_1_prefix_startwith_number ab_1_prefix_startwith_number: test_normalization.airbyte_raw_1_prefix_startwith_number multiple_column_names_conflicts_ab1: test_normalization.airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_ab2: test_normalization.airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_stg: test_normalization.airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd_new_data: test_normalization.airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_scd: test_normalization.airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts: test_normalization.airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 91f927adaeb64..98527f50e0a17 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ as --- depends on: test_normalization.dedup_exchange_rate_scd_new_data +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index e0c751a0dadfe..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - create view test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp as - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from test_normalization.dedup_exchange_rate_stg - - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 806156cfa0dcb..a1458a7dac3ad 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -2,64 +2,53 @@ unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and quote('_AIRBYTE_UNIQUE_KEY') in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( - select modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} - from - ( - select nullif(scd_table.{{ quote('_AIRBYTE_UNIQUE_KEY') }}, '') as {{ quote('_AIRBYTE_UNIQUE_KEY') }} from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }} - group by modified_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} - having count(scd_active_rows.{{ quote('_AIRBYTE_UNIQUE_KEY') }}) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and quote('_AIRBYTE_UNIQUE_KEY') in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} + from {{ this }} + where 1 = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ) and {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} not in ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} + from {{ this }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} +), new_data_ids as ( -- build a subset of {{ quote('_AIRBYTE_UNIQUE_KEY') }} from rows that are new select distinct @@ -68,11 +57,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -86,7 +75,7 @@ previous_active_scd_data as ( where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index a52c371a98ec6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,19 +0,0 @@ -{{ config( - unique_key = quote('_AIRBYTE_AB_ID'), - schema = "test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 91f927adaeb64..98527f50e0a17 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ as --- depends on: test_normalization.dedup_exchange_rate_scd_new_data +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index e0c751a0dadfe..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - create view test_normalization.dedup_exchange_rate_scd_new_data__dbt_tmp as - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from test_normalization.dedup_exchange_rate_stg - - - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml index d2abaa759df50..fa54af3b1a086 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml @@ -52,7 +52,6 @@ vars: nested_stream_with_c__lting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_c__lting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_c__lting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_c__lting_into_long_names_scd_new_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_c__lting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_c__lting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names non_nested_stream_wi__lting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names @@ -62,7 +61,6 @@ vars: some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty simple_stream_with_n__lting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 2c023fc2cf413..fa54425e1d55f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" +-- depends on: ref('nested_stream_with_c__lting_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 9864ea8e0bf21..e409c2ecd698f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."some_stream_that_was_empty_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" +-- depends on: ref('some_stream_that_was_empty_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql deleted file mode 100644 index 678a936379361..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" - as ( - --- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') - -select * from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql deleted file mode 100644 index 061fe986f6fcf..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" - as ( - --- depends_on: ref('some_stream_that_was_empty_stg') - -select * from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index ee75e2052c9de..acaa68ee0beab 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_c__lting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_c__lting_into_long_names' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }})","delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} +-- depends on: ref('nested_stream_with_c__lting_into_long_names_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} + -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from {{ ref('nested_stream_with_c__lting_into_long_names_scd_new_data') }} + select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from new_data union all select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 82e0d9c3b48c4..eb339041cba53 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='some_stream_that_was_empty' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('some_stream_that_was_empty_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='some_stream_that_was_empty' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('some_stream_that_was_empty_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('some_stream_that_was_empty_scd_new_data') }})","delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('some_stream_that_was_empty_scd_new_data') }} +-- depends on: ref('some_stream_that_was_empty_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('some_stream_that_was_empty_stg') }} + -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('some_stream_that_was_empty_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('some_stream_that_was_empty_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from {{ ref('some_stream_that_was_empty_scd_new_data') }} + select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from new_data union all select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql deleted file mode 100644 index d148a2b0c5033..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} --- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql deleted file mode 100644 index 90f82517a2105..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('some_stream_that_was_empty_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('some_stream_that_was_empty_stg') }} --- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('some_stream_that_was_empty_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql deleted file mode 100644 index 68cf364dae0c2..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_scd_new_data" ("_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "nested_stream_with_c__lting_into_long_name__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql deleted file mode 100644 index 2c2743107273e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "some_stream_that_was_empty_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."some_stream_that_was_empty_scd_new_data" ("_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "some_stream_that_was_empty_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml index de82a6ed3bfa8..daf19b9c9377c 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml @@ -56,12 +56,10 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml index 7ae3dd92eace7..200e87ca5ea7c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml @@ -56,36 +56,30 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index 4631e0da21e19..8f2383599fe25 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."1_prefix_startwith_number_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" +-- depends on: ref('1_prefix_startwith_number_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 58751dbb4ce37..310dc1101d1b6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."dedup_cdc_excluded_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" +-- depends on: ref('dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index abdce6b556e5b..b152c3250288a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."dedup_exchange_rate_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index b40a2b6ba7b63..0066be3673f26 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."multiple_column_names_conflicts_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" +-- depends on: ref('multiple_column_names_conflicts_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index e6e68674b9cf2..354a81774bcfa 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."pos_dedup_cdcx_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" +-- depends on: ref('pos_dedup_cdcx_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 2dc86c7a971f8..1b7f31846576a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" as ( --- depends on: "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" +-- depends on: ref('renamed_dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql deleted file mode 100644 index dac4eacc8fed9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" - as ( - --- depends_on: ref('1_prefix_startwith_number_stg') - -select * from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 7b4aed326b37e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" - as ( - --- depends_on: ref('dedup_cdc_excluded_stg') - -select * from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 4b181b8a132e8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" - as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql deleted file mode 100644 index c748f1f836ec1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" - as ( - --- depends_on: ref('multiple_column_names_conflicts_stg') - -select * from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql deleted file mode 100644 index 59e475b062bfb..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" - as ( - --- depends_on: ref('pos_dedup_cdcx_stg') - -select * from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 2aa7f2b541197..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,13 +0,0 @@ - - - - create table "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" - as ( - --- depends_on: ref('renamed_dedup_cdc_excluded_stg') - -select * from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" - - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index 51b475868a529..b5329744ec12e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='1_prefix_startwith_number' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('1_prefix_startwith_number_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='1_prefix_startwith_number' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('1_prefix_startwith_number_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('1_prefix_startwith_number_scd_new_data') }})","delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('1_prefix_startwith_number_scd_new_data') }} +-- depends on: ref('1_prefix_startwith_number_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('1_prefix_startwith_number_stg') }} + -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('1_prefix_startwith_number_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('1_prefix_startwith_number_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from {{ ref('1_prefix_startwith_number_scd_new_data') }} + select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from new_data union all select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 4e1c287d8ca17..8b351420e27b9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('dedup_cdc_excluded_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('dedup_cdc_excluded_scd_new_data') }})","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_cdc_excluded_scd_new_data') }} +-- depends on: ref('dedup_cdc_excluded_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('dedup_cdc_excluded_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_cdc_excluded_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from {{ ref('dedup_cdc_excluded_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cc1601f722be5..8ee9d8a0bd067 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,64 +3,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('dedup_exchange_rate_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('dedup_exchange_rate_scd_new_data') }})","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -69,11 +58,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -87,7 +76,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index cd39fa7363bc6..7a091b64723f0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='multiple_column_names_conflicts' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('multiple_column_names_conflicts_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='multiple_column_names_conflicts' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('multiple_column_names_conflicts_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('multiple_column_names_conflicts_scd_new_data') }})","delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('multiple_column_names_conflicts_scd_new_data') }} +-- depends on: ref('multiple_column_names_conflicts_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('multiple_column_names_conflicts_stg') }} + -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('multiple_column_names_conflicts_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('multiple_column_names_conflicts_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from {{ ref('multiple_column_names_conflicts_scd_new_data') }} + select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from new_data union all select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 32f8470144da1..fa89120de0a47 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='pos_dedup_cdcx' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('pos_dedup_cdcx_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='pos_dedup_cdcx' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('pos_dedup_cdcx_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('pos_dedup_cdcx_scd_new_data') }})","delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('pos_dedup_cdcx_scd_new_data') }} +-- depends on: ref('pos_dedup_cdcx_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('pos_dedup_cdcx_stg') }} + -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('pos_dedup_cdcx_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('pos_dedup_cdcx_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from {{ ref('pos_dedup_cdcx_scd_new_data') }} + select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from new_data union all select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index a1942adb4aabf..97850b8cf2fc0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='renamed_dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='renamed_dedup_cdc_excluded' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }})","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} +-- depends on: ref('renamed_dedup_cdc_excluded_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data union all select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql deleted file mode 100644 index dbc21ce7b3ca8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('1_prefix_startwith_number_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('1_prefix_startwith_number_stg') }} --- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('1_prefix_startwith_number_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 865791db29978..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_cdc_excluded_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_cdc_excluded_stg') }} --- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_cdc_excluded_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index b8dc781f16dc7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql deleted file mode 100644 index 0fe0693e9d1d4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('multiple_column_names_conflicts_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('multiple_column_names_conflicts_stg') }} --- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('multiple_column_names_conflicts_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql deleted file mode 100644 index 49df39d8e0ff9..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('pos_dedup_cdcx_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('pos_dedup_cdcx_stg') }} --- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('pos_dedup_cdcx_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 4e251b38b7383..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('renamed_dedup_cdc_excluded_stg') }} --- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('renamed_dedup_cdc_excluded_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 90081f1578d64..a7c9045ed5330 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,64 +3,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('dedup_exchange_rate_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('dedup_exchange_rate_scd_new_data') }})","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -69,11 +58,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -87,7 +76,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index e48a79fce15fc..6432771b55812 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='renamed_dedup_cdc_excluded' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - ]) }} as _airbyte_unique_key - from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='renamed_dedup_cdc_excluded' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","delete from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }})","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} +-- depends on: ref('renamed_dedup_cdc_excluded_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ adapter.quote('id'), ]) }} as _airbyte_unique_key - from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from {{ ref('renamed_dedup_cdc_excluded_scd_new_data') }} + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data union all select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index b8dc781f16dc7..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 4e251b38b7383..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('renamed_dedup_cdc_excluded_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('renamed_dedup_cdc_excluded_stg') }} --- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('renamed_dedup_cdc_excluded_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql deleted file mode 100644 index b246701c2e123..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "1_prefix_startwith_number_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."1_prefix_startwith_number_scd_new_data" ("_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "1_prefix_startwith_number_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 7006d949e5f5b..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_cdc_excluded_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_scd_new_data" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "dedup_cdc_excluded_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 18d62497dea4a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_exchange_rate_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" ("_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "dedup_exchange_rate_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql deleted file mode 100644 index 914878633d4d8..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "multiple_column_names_conflicts_scd_new_da__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_scd_new_data" ("_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "multiple_column_names_conflicts_scd_new_da__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql deleted file mode 100644 index da0f97d9aab3a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "pos_dedup_cdcx_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."pos_dedup_cdcx_scd_new_data" ("_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "pos_dedup_cdcx_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index facf4989f9084..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") - ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" - from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index c84aad95317f1..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "dedup_exchange_rate_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" ("_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd") - ( - select "_airbyte_dedup_exchange_rate_hashid", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "new_column", "id", "usd" - from "dedup_exchange_rate_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql deleted file mode 100644 index 5f63dc0467c5d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_scd_new_data.sql +++ /dev/null @@ -1,15 +0,0 @@ - - - delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" - where (_airbyte_ab_id) in ( - select (_airbyte_ab_id) - from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" - ); - - - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_scd_new_data" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") - ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" - from "renamed_dedup_cdc_excluded_scd_new_data__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml index 44d3103d221e9..12f7ddeae9704 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml @@ -54,7 +54,6 @@ vars: nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names @@ -64,7 +63,6 @@ vars: some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd_new_data: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 2a961faaec733..cb059ed9a244a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -9,7 +9,7 @@ as ( --- depends on: "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data" +-- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql deleted file mode 100644 index 882316f6da066..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,10 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data__dbt_tmp" as ( - --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') - -select * from "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_stg" - - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 533f2cf15a0d8..0a988fc89f480 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -3,73 +3,64 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='nested_stream_with_complex_columns_resulting_into_long_names' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_complex_columns_resulting_into_long_names' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }}","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} +-- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'id', ]) }} as _airbyte_unique_key - from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data') }} + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data union all select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql deleted file mode 100644 index 8d6939ff2157e..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} --- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql deleted file mode 100644 index 882316f6da066..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_views/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data.sql +++ /dev/null @@ -1,10 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_scd_new_data__dbt_tmp" as ( - --- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') - -select * from "integrationtests"._airbyte_test_normalization."nested_stream_with_complex_columns_resulting_into_long_names_stg" - - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml index 6df743bc8c9f0..cfd8723edf5f4 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml @@ -58,12 +58,10 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml index ef94da4100413..5b2760dc9d0f2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml @@ -58,36 +58,30 @@ vars: dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd_new_data: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd_new_data: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd_new_data: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd_new_data: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd_new_data: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index d305fad65aa01..3a9b19b40246b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -9,7 +9,7 @@ as ( --- depends on: "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data" +-- depends on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 200a7b1d3e0a6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,10 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data__dbt_tmp" as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_stg" - - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index d08d58bdbfd7d..1fb7901421091 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,64 +3,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -69,11 +58,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -87,7 +76,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 02049154e8298..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 0064da124325f..a89d2f8e880c6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -3,64 +3,53 @@ unique_key = "_airbyte_unique_key_scd", schema = "test_normalization", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='dedup_exchange_rate' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select modified_ids._airbyte_unique_key - from - ( - select nullif(scd_table._airbyte_unique_key, '') as _airbyte_unique_key from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._airbyte_unique_key = modified_ids._airbyte_unique_key - where _airbyte_active_row = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', - ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} - where 1=1 - {{ incremental_clause('_airbyte_emitted_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) modified_ids - on modified_ids._airbyte_unique_key = scd_active_rows._airbyte_unique_key - group by modified_ids._airbyte_unique_key - having count(scd_active_rows._airbyte_unique_key) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('dedup_exchange_rate_scd_new_data') }}","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select _airbyte_unique_key + from {{ this }} + where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) and {{ final_table_relation }}._airbyte_unique_key not in ( + select _airbyte_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('dedup_exchange_rate_scd_new_data') }} +-- depends on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), new_data_ids as ( -- build a subset of _airbyte_unique_key from rows that are new select distinct @@ -69,11 +58,11 @@ new_data_ids as ( 'currency', 'nzd', ]) }} as _airbyte_unique_key - from {{ ref('dedup_exchange_rate_scd_new_data') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('dedup_exchange_rate_scd_new_data') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -87,7 +76,7 @@ previous_active_scd_data as ( where _airbyte_active_row = 1 ), input_data as ( - select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from {{ ref('dedup_exchange_rate_scd_new_data') }} + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data union all select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 02049154e8298..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('dedup_exchange_rate_stg') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('dedup_exchange_rate_stg') }} --- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at', this) }} -{% else %} -select * from {{ ref('dedup_exchange_rate_stg') }} -{% endif %} -{{ incremental_clause('_airbyte_emitted_at', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 200a7b1d3e0a6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,10 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data__dbt_tmp" as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_stg" - - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql deleted file mode 100644 index 200a7b1d3e0a6..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_scd_new_data.sql +++ /dev/null @@ -1,10 +0,0 @@ - - - create view "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_scd_new_data__dbt_tmp" as ( - --- depends_on: ref('dedup_exchange_rate_stg') - -select * from "integrationtests"._airbyte_test_normalization."dedup_exchange_rate_stg" - - - ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml index 9797dbfc27804..571d5bae581bb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml @@ -53,7 +53,6 @@ vars: NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES - NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES @@ -63,7 +62,6 @@ vars: SOME_STREAM_THAT_WAS_EMPTY_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY_STG: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY - SOME_STREAM_THAT_WAS_EMPTY_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SOME_STREAM_THAT_WAS_EMPTY: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 7a1e1c737e941..e834f52085a30 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -3,7 +3,7 @@ create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as (select * from( --- depends on: "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA" +-- depends on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql deleted file mode 100644 index 9eee6ae93ca9d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql +++ /dev/null @@ -1,11 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA" - - as ( - --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') - -select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG" - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 1b7251a40785d..f7a69b5e7766f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -3,73 +3,64 @@ unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", schema = "TEST_NORMALIZATION", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select modified_ids._AIRBYTE_UNIQUE_KEY - from - ( - select nullif(scd_table._AIRBYTE_UNIQUE_KEY, '') as _AIRBYTE_UNIQUE_KEY from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY - where _AIRBYTE_ACTIVE_ROW = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'ID', - ]) }} as _AIRBYTE_UNIQUE_KEY - from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} - where 1=1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - ) modified_ids - on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY - group by modified_ids._AIRBYTE_UNIQUE_KEY - having count(scd_active_rows._AIRBYTE_UNIQUE_KEY) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }}","drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( + select _AIRBYTE_UNIQUE_KEY + from {{ this }} + where 1 = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ) and {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY not in ( + select _AIRBYTE_UNIQUE_KEY + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} +-- depends on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} + -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} + where 1 = 1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} +), new_data_ids as ( -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new select distinct {{ dbt_utils.surrogate_key([ 'ID', ]) }} as _AIRBYTE_UNIQUE_KEY - from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -83,7 +74,7 @@ previous_active_scd_data as ( where _AIRBYTE_ACTIVE_ROW = 1 ), input_data as ( - select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA') }} + select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from new_data union all select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql deleted file mode 100644 index 27968b241cb00..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} --- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} -{% else %} -select * from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} -{% endif %} -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql deleted file mode 100644 index 9eee6ae93ca9d..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_views/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA.sql +++ /dev/null @@ -1,11 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD_NEW_DATA" - - as ( - --- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') - -select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG" - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml index 083fcda300c3b..76a02e4351f0e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml @@ -57,36 +57,30 @@ vars: DEDUP_EXCHANGE_RATE_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE - DEDUP_EXCHANGE_RATE_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE DEDUP_EXCHANGE_RATE: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE RENAMED_DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED - RENAMED_DEDUP_CDC_EXCLUDED_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED RENAMED_DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED - DEDUP_CDC_EXCLUDED_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED POS_DEDUP_CDCX_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX_STG: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX - POS_DEDUP_CDCX_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX POS_DEDUP_CDCX: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX 1_prefix_startwith_number_ab1: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number_ab2: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number_stg: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER - 1_prefix_startwith_number_scd_new_data: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number_scd: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER 1_prefix_startwith_number: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS_STG: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS - MULTIPLE_COLUMN_NAMES_CONFLICTS_SCD_NEW_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS MULTIPLE_COLUMN_NAMES_CONFLICTS: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index fbce7f410f858..c7662fbaeef79 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -3,7 +3,7 @@ create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as (select * from( --- depends on: "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD_NEW_DATA" +-- depends on: ref('DEDUP_EXCHANGE_RATE_STG') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql deleted file mode 100644 index 83e400e3078ad..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql +++ /dev/null @@ -1,11 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD_NEW_DATA" - - as ( - --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') - -select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" - - - ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 536f07528ce1b..5b23c8050a370 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -3,64 +3,53 @@ unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", schema = "TEST_NORMALIZATION", post_hook = [" - {% - set final_table_relation = adapter.get_relation( - database=this.database, - schema=this.schema, - identifier='DEDUP_EXCHANGE_RATE' - ) - %} - {# - If the final table doesn't exist, then obviously we can't delete anything from it. - Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) - So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) - #} - {% - if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') - %} - -- Delete records which are no longer active: - -- 1. Find the records which are being updated by querying the _scd_new_data model - -- 2. Then join that against the SCD model to find the records which have no row with _airbyte_active_row = 1 - -- We can't just delete all the modified_ids from final_table because those records might still be active, but not included - -- in the most recent increment (i.e. the final table model would not re-insert them, so the data would be incorrectly lost). - -- In fact, there's no guarantee that the active record is included in the previous_active_scd_data CTE either, - -- so we _must_ join against the entire SCD table to find the active row for each record. - -- We're using a subquery because not all destinations support CTEs in DELETE statements (c.f. Snowflake). - -- Similarly, the subquery doesn't use CTEs because Clickhouse doesn't support CTEs inside delete conditions. - delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select modified_ids._AIRBYTE_UNIQUE_KEY - from - ( - select nullif(scd_table._AIRBYTE_UNIQUE_KEY, '') as _AIRBYTE_UNIQUE_KEY from {{ this }} scd_table --- TODO is this even necessary? --- inner join modified_ids on scd_table._AIRBYTE_UNIQUE_KEY = modified_ids._AIRBYTE_UNIQUE_KEY - where _AIRBYTE_ACTIVE_ROW = 1 - ) scd_active_rows - right outer join ( - select - {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'NZD', - ]) }} as _AIRBYTE_UNIQUE_KEY - from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} - where 1=1 - {{ incremental_clause('_AIRBYTE_EMITTED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - ) modified_ids - on modified_ids._AIRBYTE_UNIQUE_KEY = scd_active_rows._AIRBYTE_UNIQUE_KEY - group by modified_ids._AIRBYTE_UNIQUE_KEY - having count(scd_active_rows._AIRBYTE_UNIQUE_KEY) = 0 + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='DEDUP_EXCHANGE_RATE' ) - {% else %} - -- We have to have a non-empty query, so just do a noop delete - delete from {{ this }} where 1=0 - {% endif %} - ","drop view {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }}","drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- The first subquery finds the most recent increment to the SCD table + -- The second subquery finds, within that increment, the records which are still active + -- We want to delete rows which are in that increment, but are not active + delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( + select _AIRBYTE_UNIQUE_KEY + from {{ this }} + where 1 = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ) and {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY not in ( + select _AIRBYTE_UNIQUE_KEY + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], tags = [ "top-level" ] ) }} --- depends on: {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} +-- depends on: ref('DEDUP_EXCHANGE_RATE_STG') with {% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} + -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} + where 1 = 1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} +), new_data_ids as ( -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new select distinct @@ -69,11 +58,11 @@ new_data_ids as ( 'CURRENCY', 'NZD', ]) }} as _AIRBYTE_UNIQUE_KEY - from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} + from new_data ), empty_new_data as ( -- build an empty table to only keep the table's column types - select * from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} where 1 = 0 + select * from new_data where 1 = 0 ), previous_active_scd_data as ( -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes @@ -87,7 +76,7 @@ previous_active_scd_data as ( where _AIRBYTE_ACTIVE_ROW = 1 ), input_data as ( - select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from {{ ref('DEDUP_EXCHANGE_RATE_SCD_NEW_DATA') }} + select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from new_data union all select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from previous_active_scd_data ), diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql deleted file mode 100644 index a327f8001be1a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql +++ /dev/null @@ -1,20 +0,0 @@ -{{ config( - cluster_by = ["_AIRBYTE_EMITTED_AT"], - unique_key = '_AIRBYTE_AB_ID', - schema = "_AIRBYTE_TEST_NORMALIZATION", - tags = [ "top-level-intermediate" ] -) }} --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') -{% if is_incremental() %} --- retrieve incremental "new" data -select - * -from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} --- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} -where 1 = 1 -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} -{% else %} -select * from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} -{% endif %} -{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql deleted file mode 100644 index 83e400e3078ad..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD_NEW_DATA.sql +++ /dev/null @@ -1,11 +0,0 @@ - - create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD_NEW_DATA" - - as ( - --- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') - -select * from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" - - - ); From 20438e1b679814a9d4728f1a6260336036932645 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Sun, 12 Jun 2022 19:34:25 -0700 Subject: [PATCH 36/43] add test case case for cross-sync deletion --- .../data_input/catalog_schema_change.json | 32 +++++++++++++++++++ .../data_input/messages_schema_change.txt | 2 ++ .../simple_streams_third_run_row_counts.sql | 6 ++-- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json index ac8cea0232146..a54e89c4ff2ee 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json @@ -119,6 +119,38 @@ "cursor_field": [], "destination_sync_mode": "append_dedup", "primary_key": [["id"]] + }, + { + "stream": { + "name": "dedup_cdc_excluded", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": ["string", "null"] + }, + "_ab_cdc_lsn": { + "type": ["null", "number"] + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] + }, + "_ab_cdc_deleted_at": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["_ab_cdc_lsn"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] } ] } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt index ebe17b33d6e71..c29a171a7f683 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt @@ -11,3 +11,5 @@ {"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":8,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623949314663,"_ab_cdc_lsn":26985264,"_ab_cdc_deleted_at":null},"emitted_at":1623960160}} {"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":"opel","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868109,"_ab_cdc_lsn":28009440,"_ab_cdc_deleted_at":null},"emitted_at":1623961660}} {"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868371,"_ab_cdc_lsn":28010232,"_ab_cdc_deleted_at":1623950868371},"emitted_at":1623961660}} + +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1625000000000,"_ab_cdc_lsn":29020252,"_ab_cdc_deleted_at":1625000000000},"emitted_at":1625000000000}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql index dadeb6026f62e..bbf2fd047b446 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql @@ -18,13 +18,13 @@ union all union all - select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count + select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count from test_normalization._airbyte_raw_dedup_cdc_excluded union all - select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 10 as expected_count + select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 11 as expected_count from test_normalization.dedup_cdc_excluded_scd union all - select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count from test_normalization.dedup_cdc_excluded ) select * From dfee8e082b191580d402fe2d90bd94ec1a2a9203 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Sun, 12 Jun 2022 19:38:53 -0700 Subject: [PATCH 37/43] faster query --- .../transform_catalog/stream_processor.py | 32 +++++++++++++------ 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index ed88c0e2194b0..2ba799fe82f38 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -849,7 +849,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup jinja_variables["scd_columns_sql"] = scd_columns_sql sql = Template( """ --- depends on: {{ from_table }} +-- depends_on: {{ from_table }} with {{ '{% if is_incremental() %}' }} new_data as ( @@ -1120,11 +1120,15 @@ def add_to_outputs( final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") + clickhouse_nullable_join_setting = "" if self.destination_type == DestinationType.CLICKHOUSE: # Clickhouse has special delete syntax delete_statement = "alter table {{ final_table_relation }} delete" unique_key_reference = self.get_unique_key(in_jinja=False) noop_delete_statement = "alter table {{ this }} delete where 1=0" + # Without this, our LEFT JOIN would return empty string for non-matching rows, so our COUNT would include those rows. + # We want to exclude them (this is the default behavior in other DBs) so we have to set join_use_nulls=1 + clickhouse_nullable_join_setting = "SETTINGS join_use_nulls=1" elif self.destination_type == DestinationType.BIGQUERY: # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; # it requires "delete from project.schema.table table_alias where table_alias.column in" @@ -1154,17 +1158,24 @@ def add_to_outputs( {{ '%}' }} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) {{ delete_statement }} where {{ unique_key_reference }} in ( - select {{ unique_key }} - from {{ '{{ this }}' }} - where 1 = 1 {{ normalized_at_incremental_clause }} - ) and {{ unique_key_reference }} not in ( - select {{ unique_key }} + select distinct {{ unique_key }} from {{ '{{ this }}' }} - where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} + left join ( + select {{ unique_key }} as active_unique_key + from {{ '{{ this }}' }} + where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} + ) active_recent_scd_rows on {{ unique_key }} = active_unique_key + where 1=1 {{ normalized_at_incremental_clause }} + group by {{ unique_key }} + having count(active_unique_key) = 0 + {{ clickhouse_nullable_join_setting }} ) {{ '{% else %}' }} -- We have to have a non-empty query, so just do a noop delete @@ -1183,6 +1194,7 @@ def add_to_outputs( self.get_normalized_at(in_jinja=True), ), unique_key_reference=unique_key_reference, + clickhouse_nullable_join_setting=clickhouse_nullable_join_setting, ) hooks.append(deletion_hook) From 90b2f86494c207f66eada781e2a2f8cd6914b699 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 13 Jun 2022 09:47:23 -0700 Subject: [PATCH 38/43] regenerate outputs --- ..._columns_resulting_into_long_names_scd.sql | 2 +- ..._columns_resulting_into_long_names_scd.sql | 26 +-- .../test_simple_streams/dbt_project.yml | 5 + .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 26 +-- .../dedup_exchange_rate_scd.sql | 26 +-- .../modified_models/generated/sources.yml | 19 +- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../renamed_dedup_cdc_excluded_scd.sql | 2 +- .../dedup_cdc_excluded_scd.sql | 27 +-- .../dedup_exchange_rate_scd.sql | 27 +-- ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- ..._stream_with_co_1g_into_long_names_scd.sql | 26 +-- ..._stream_with_co_1g_into_long_names_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 26 +-- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 26 +-- .../dedup_exchange_rate_scd.sql | 2 +- ...ream_with_c__lting_into_long_names_scd.sql | 2 +- .../some_stream_that_was_empty_scd.sql | 2 +- ...ream_with_c__lting_into_long_names_scd.sql | 26 +-- .../some_stream_that_was_empty_scd.sql | 26 +-- .../test_simple_streams/dbt_project.yml | 5 + .../1_prefix_startwith_number_scd.sql | 2 +- .../dedup_cdc_excluded_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 2 +- .../multiple_column_names_conflicts_scd.sql | 2 +- .../test_normalization/pos_dedup_cdcx_scd.sql | 2 +- .../renamed_dedup_cdc_excluded_scd.sql | 2 +- .../1_prefix_startwith_number_scd.sql | 26 +-- .../dedup_cdc_excluded_scd.sql | 26 +-- .../dedup_exchange_rate_scd.sql | 26 +-- .../multiple_column_names_conflicts_scd.sql | 26 +-- .../test_normalization/pos_dedup_cdcx_scd.sql | 26 +-- .../renamed_dedup_cdc_excluded_scd.sql | 26 +-- .../dedup_cdc_excluded_ab1.sql | 22 +++ .../dedup_cdc_excluded_ab2.sql | 22 +++ .../dedup_cdc_excluded_scd.sql | 162 ++++++++++++++++++ .../dedup_exchange_rate_scd.sql | 26 +-- .../renamed_dedup_cdc_excluded_scd.sql | 26 +-- .../test_normalization/dedup_cdc_excluded.sql | 25 +++ .../dedup_cdc_excluded_stg.sql | 22 +++ .../modified_models/generated/sources.yml | 19 +- .../dedup_cdc_excluded_scd.sql | 15 ++ .../test_normalization/dedup_cdc_excluded.sql | 15 ++ .../dedup_cdc_excluded_stg.sql | 15 ++ ..._columns_resulting_into_long_names_scd.sql | 2 +- ..._columns_resulting_into_long_names_scd.sql | 26 +-- .../test_simple_streams/dbt_project.yml | 5 + .../dedup_exchange_rate_scd.sql | 2 +- .../dedup_exchange_rate_scd.sql | 26 +-- .../dedup_exchange_rate_scd.sql | 26 +-- .../modified_models/generated/sources.yml | 19 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 2 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 26 +-- .../DEDUP_EXCHANGE_RATE_SCD.sql | 2 +- .../DEDUP_EXCHANGE_RATE_SCD.sql | 26 +-- 60 files changed, 736 insertions(+), 280 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 45aa53937e094..59cf6d3a78044 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -9,7 +9,7 @@ OPTIONS() as ( --- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 7ba778639fef4..6a3d01894d614 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -20,17 +20,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) and final_table._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -39,7 +45,7 @@ ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml index daf19b9c9377c..77cd510537471 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml @@ -63,3 +63,8 @@ vars: renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index c8203853d54d2..d7fd59df15b5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -9,7 +9,7 @@ OPTIONS() as ( --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 423aad4f8dff0..bdd2e78031e15 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -20,17 +20,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and final_table._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -39,7 +45,7 @@ ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index c44384f3cda1d..851df325ae711 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -20,17 +20,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and final_table._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -39,7 +45,7 @@ ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml index dd538a80131ae..79ad1a1bb5c5a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml @@ -1,11 +1,12 @@ version: 2 sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 5bdb0dc1df913..0c7c151236fd2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -12,7 +12,7 @@ as ( --- depends on: ref('dedup_cdc_excluded_stg') +-- depends_on: ref('dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index bd961b05351de..c1e8e6cb63fec 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -12,7 +12,7 @@ as ( --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 682ba8ff69576..eedc913fd45a5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -12,7 +12,7 @@ as ( --- depends on: ref('renamed_dedup_cdc_excluded_stg') +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 4e6f8d2a11928..2bbf52a1b2600 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -18,17 +18,24 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} - ) and _airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 + SETTINGS join_use_nulls=1 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -37,7 +44,7 @@ ","drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_cdc_excluded_stg') +-- depends_on: ref('dedup_cdc_excluded_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3490631ea19bf..48a1f27bce797 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -18,17 +18,24 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} - ) and _airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 + SETTINGS join_use_nulls=1 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -37,7 +44,7 @@ ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index d36afc59a4fa2..e5f3e4859deba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -4,7 +4,7 @@ test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` as ( --- depends on: ref('nested_stream_with_co_1g_into_long_names_stg') +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 1768d23641384..bec1d659bfcb8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -18,17 +18,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -37,7 +43,7 @@ ","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('nested_stream_with_co_1g_into_long_names_stg') +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index d36afc59a4fa2..e5f3e4859deba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -4,7 +4,7 @@ test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` as ( --- depends on: ref('nested_stream_with_co_1g_into_long_names_stg') +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index f837bab8d5381..59d722cb4f381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ test_normalization.`dedup_exchange_rate_scd__dbt_tmp` as ( --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 365b40479baa3..00c8ccbaeb48b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -18,17 +18,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -37,7 +43,7 @@ ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index f837bab8d5381..59d722cb4f381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ test_normalization.`dedup_exchange_rate_scd__dbt_tmp` as ( --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 98527f50e0a17..cfd186b006ae3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ as --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a1458a7dac3ad..2dab8cabfb4f7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -18,17 +18,23 @@ if final_table_relation is not none and quote('_AIRBYTE_UNIQUE_KEY') in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( - select {{ quote('_AIRBYTE_UNIQUE_KEY') }} + select distinct {{ quote('_AIRBYTE_UNIQUE_KEY') }} from {{ this }} - where 1 = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - ) and {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} not in ( - select {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from {{ this }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + left join ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as active_unique_key + from {{ this }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on {{ quote('_AIRBYTE_UNIQUE_KEY') }} = active_unique_key + where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -37,7 +43,7 @@ ","drop view test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 98527f50e0a17..cfd186b006ae3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ as --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index fa54425e1d55f..150407b1fbdf5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" as ( --- depends on: ref('nested_stream_with_c__lting_into_long_names_stg') +-- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index e409c2ecd698f..885ba6546326a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."some_stream_that_was_empty_scd" as ( --- depends on: ref('some_stream_that_was_empty_stg') +-- depends_on: ref('some_stream_that_was_empty_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index acaa68ee0beab..75d10fb21e544 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('nested_stream_with_c__lting_into_long_names_stg') +-- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index eb339041cba53..5b9715a46cc03 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('some_stream_that_was_empty_stg') +-- depends_on: ref('some_stream_that_was_empty_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml index daf19b9c9377c..77cd510537471 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml @@ -63,3 +63,8 @@ vars: renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index 8f2383599fe25..dac6628377db2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."1_prefix_startwith_number_scd" as ( --- depends on: ref('1_prefix_startwith_number_stg') +-- depends_on: ref('1_prefix_startwith_number_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 310dc1101d1b6..ba66363a77f5a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."dedup_cdc_excluded_scd" as ( --- depends on: ref('dedup_cdc_excluded_stg') +-- depends_on: ref('dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index b152c3250288a..c9440958247d2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."dedup_exchange_rate_scd" as ( --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 0066be3673f26..9eb7e6e349ab2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."multiple_column_names_conflicts_scd" as ( --- depends on: ref('multiple_column_names_conflicts_stg') +-- depends_on: ref('multiple_column_names_conflicts_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 354a81774bcfa..450815d1ccc51 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."pos_dedup_cdcx_scd" as ( --- depends on: ref('pos_dedup_cdcx_stg') +-- depends_on: ref('pos_dedup_cdcx_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 1b7f31846576a..31e25e700b601 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -4,7 +4,7 @@ create table "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" as ( --- depends on: ref('renamed_dedup_cdc_excluded_stg') +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index b5329744ec12e..f8b7865b3cfe8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], tags = [ "top-level" ] ) }} --- depends on: ref('1_prefix_startwith_number_stg') +-- depends_on: ref('1_prefix_startwith_number_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 8b351420e27b9..e5873b93f6da7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_cdc_excluded_stg') +-- depends_on: ref('dedup_cdc_excluded_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 8ee9d8a0bd067..f0d0ff9e497be 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 7a091b64723f0..99667f0e34175 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('multiple_column_names_conflicts_stg') +-- depends_on: ref('multiple_column_names_conflicts_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index fa89120de0a47..5d1a03147fa05 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('pos_dedup_cdcx_stg') +-- depends_on: ref('pos_dedup_cdcx_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 97850b8cf2fc0..0c2e80cd629e2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('renamed_dedup_cdc_excluded_stg') +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..99a03831a8ba8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..3d8803e27a664 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, + cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_cdc_excluded_ab1') }} +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..e5873b93f6da7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,162 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select distinct _airbyte_unique_key + from {{ this }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a7c9045ed5330..461b04dd7d1d7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 6432771b55812..1da6ab0db1336 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], tags = [ "top-level" ] ) }} --- depends on: ref('renamed_dedup_cdc_excluded_stg') +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..32d70c680aa9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from {{ ref('dedup_cdc_excluded_scd') }} +-- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..b0cd4bf7cb134 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_cdc_excluded_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('name'), + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('dedup_cdc_excluded_ab2') }} tmp +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml index dd538a80131ae..79ad1a1bb5c5a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml @@ -1,11 +1,12 @@ version: 2 sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..a1fba0a6d7ff4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_cdc_excluded_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_cdc_excluded_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from "dedup_cdc_excluded_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..b3012059b462d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_cdc_excluded" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_cdc_excluded__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from "dedup_cdc_excluded__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..d9f833d441bfa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_cdc_excluded_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_cdc_excluded_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index cb059ed9a244a..753b62319771c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -9,7 +9,7 @@ as ( --- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 0a988fc89f480..4dd8d4778e8b2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml index cfd8723edf5f4..06d2109d33564 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml @@ -65,3 +65,8 @@ vars: renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3a9b19b40246b..2d58288b13eac 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -9,7 +9,7 @@ as ( --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 1fb7901421091..03cb6839b96dc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a89d2f8e880c6..881c1b76c5563 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select _airbyte_unique_key + select distinct _airbyte_unique_key from {{ this }} - where 1 = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) and {{ final_table_relation }}._airbyte_unique_key not in ( - select _airbyte_unique_key - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + left join ( + select _airbyte_unique_key as active_unique_key + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} --- depends on: ref('dedup_exchange_rate_stg') +-- depends_on: ref('dedup_exchange_rate_stg') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml index dd538a80131ae..79ad1a1bb5c5a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml @@ -1,11 +1,12 @@ version: 2 sources: -- name: test_normalization - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_dedup_exchange_rate - - name: _airbyte_raw_exchange_rate - - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index e834f52085a30..b3072ce0004ca 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -3,7 +3,7 @@ create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as (select * from( --- depends on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index f7a69b5e7766f..47d7655dea993 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select _AIRBYTE_UNIQUE_KEY + select distinct _AIRBYTE_UNIQUE_KEY from {{ this }} - where 1 = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - ) and {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY not in ( - select _AIRBYTE_UNIQUE_KEY - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + left join ( + select _AIRBYTE_UNIQUE_KEY as active_unique_key + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ) active_recent_scd_rows on _AIRBYTE_UNIQUE_KEY = active_unique_key + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + group by _AIRBYTE_UNIQUE_KEY + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], tags = [ "top-level" ] ) }} --- depends on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') with {% if is_incremental() %} new_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index c7662fbaeef79..220cd093da41e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -3,7 +3,7 @@ create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as (select * from( --- depends on: ref('DEDUP_EXCHANGE_RATE_STG') +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') with input_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 5b23c8050a370..b634ac8aeaa4b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -19,17 +19,23 @@ if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') %} -- Delete records which are no longer active: - -- The first subquery finds the most recent increment to the SCD table - -- The second subquery finds, within that increment, the records which are still active - -- We want to delete rows which are in that increment, but are not active + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select _AIRBYTE_UNIQUE_KEY + select distinct _AIRBYTE_UNIQUE_KEY from {{ this }} - where 1 = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - ) and {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY not in ( - select _AIRBYTE_UNIQUE_KEY - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + left join ( + select _AIRBYTE_UNIQUE_KEY as active_unique_key + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ) active_recent_scd_rows on _AIRBYTE_UNIQUE_KEY = active_unique_key + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + group by _AIRBYTE_UNIQUE_KEY + having count(active_unique_key) = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete @@ -38,7 +44,7 @@ ","drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], tags = [ "top-level" ] ) }} --- depends on: ref('DEDUP_EXCHANGE_RATE_STG') +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') with {% if is_incremental() %} new_data as ( From 05566731a536ca783426fefedad8d623594fffbc Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 13 Jun 2022 14:13:53 -0700 Subject: [PATCH 39/43] better sql --- .../transform_catalog/stream_processor.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 2ba799fe82f38..895d1f37abc6c 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1164,18 +1164,23 @@ def add_to_outputs( -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. {{ delete_statement }} where {{ unique_key_reference }} in ( - select distinct {{ unique_key }} - from {{ '{{ this }}' }} - left join ( - select {{ unique_key }} as active_unique_key + select inactive_counts.unique_key + from ( + select {{ unique_key }} as unique_key, count({{ unique_key }}) as inactive_count + from {{ '{{ this }}' }} + where {{ active_row_column_name }} = 0 {{ normalized_at_incremental_clause }} + group by {{ unique_key }} + ) inactive_counts left join ( + select {{ unique_key }} as unique_key, count({{ unique_key }}) as active_count from {{ '{{ this }}' }} where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} - ) active_recent_scd_rows on {{ unique_key }} = active_unique_key - where 1=1 {{ normalized_at_incremental_clause }} - group by {{ unique_key }} - having count(active_unique_key) = 0 - {{ clickhouse_nullable_join_setting }} + group by {{ unique_key }} + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {{ '{% else %}' }} -- We have to have a non-empty query, so just do a noop delete From c46495c3aa22f7c809035012cd441e18e3e54599 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 13 Jun 2022 14:14:13 -0700 Subject: [PATCH 40/43] regenerate output --- ..._columns_resulting_into_long_names_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- .../dedup_cdc_excluded_scd.sql | 23 +++++++++++-------- .../dedup_exchange_rate_scd.sql | 23 +++++++++++-------- ..._stream_with_co_1g_into_long_names_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- ...ream_with_c__lting_into_long_names_scd.sql | 22 +++++++++++------- .../some_stream_that_was_empty_scd.sql | 22 +++++++++++------- .../1_prefix_startwith_number_scd.sql | 22 +++++++++++------- .../dedup_cdc_excluded_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- .../multiple_column_names_conflicts_scd.sql | 22 +++++++++++------- .../test_normalization/pos_dedup_cdcx_scd.sql | 22 +++++++++++------- .../renamed_dedup_cdc_excluded_scd.sql | 22 +++++++++++------- .../dedup_cdc_excluded_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- .../renamed_dedup_cdc_excluded_scd.sql | 22 +++++++++++------- ..._columns_resulting_into_long_names_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- .../dedup_exchange_rate_scd.sql | 22 +++++++++++------- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 22 +++++++++++------- .../DEDUP_EXCHANGE_RATE_SCD.sql | 22 +++++++++++------- 24 files changed, 336 insertions(+), 194 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 6a3d01894d614..7b76df58b37d0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -26,17 +26,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index bdd2e78031e15..aaf0c846bdd37 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -26,17 +26,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 851df325ae711..7bef54bc9932b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -26,17 +26,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 2bbf52a1b2600..af5142602731b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -24,18 +24,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 - SETTINGS join_use_nulls=1 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 48a1f27bce797..4b759e0bdd70d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -24,18 +24,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 - SETTINGS join_use_nulls=1 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index bec1d659bfcb8..899fac24bfd19 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -24,17 +24,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 00c8ccbaeb48b..7e1a17fc3423b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -24,17 +24,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 2dab8cabfb4f7..b286d8e7dbfe9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -24,17 +24,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( - select distinct {{ quote('_AIRBYTE_UNIQUE_KEY') }} - from {{ this }} - left join ( - select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as active_unique_key + select inactive_counts.unique_key + from ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as inactive_count + from {{ this }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 0 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} + ) inactive_counts left join ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as active_count from {{ this }} where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on {{ quote('_AIRBYTE_UNIQUE_KEY') }} = active_unique_key - where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} - having count(active_unique_key) = 0 + group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 75d10fb21e544..08ec2275c2c1a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 5b9715a46cc03..2654fe8c03213 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index f8b7865b3cfe8..a914b5cbabe9a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index e5873b93f6da7..d19aaa68d3f55 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index f0d0ff9e497be..38ef6bf2cb1d3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 99667f0e34175..984e0c38c921e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 5d1a03147fa05..4b4f20c7cb94a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 0c2e80cd629e2..6118f641c2e6d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index e5873b93f6da7..d19aaa68d3f55 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 461b04dd7d1d7..a2a4b74c37726 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 1da6ab0db1336..95f300f06ea59 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 4dd8d4778e8b2..adaeb9bfcdad2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 03cb6839b96dc..ecac7537743ae 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 881c1b76c5563..ecdc6d8951a41 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select distinct _airbyte_unique_key - from {{ this }} - left join ( - select _airbyte_unique_key as active_unique_key + select inactive_counts.unique_key + from ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count + from {{ this }} + where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) inactive_counts left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - ) active_recent_scd_rows on _airbyte_unique_key = active_unique_key - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - having count(active_unique_key) = 0 + group by _airbyte_unique_key + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 47d7655dea993..fc70e5631cefe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select distinct _AIRBYTE_UNIQUE_KEY - from {{ this }} - left join ( - select _AIRBYTE_UNIQUE_KEY as active_unique_key + select inactive_counts.unique_key + from ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as inactive_count + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 0 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + group by _AIRBYTE_UNIQUE_KEY + ) inactive_counts left join ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count from {{ this }} where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - ) active_recent_scd_rows on _AIRBYTE_UNIQUE_KEY = active_unique_key - where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - group by _AIRBYTE_UNIQUE_KEY - having count(active_unique_key) = 0 + group by _AIRBYTE_UNIQUE_KEY + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index b634ac8aeaa4b..0a47ea898f682 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -25,17 +25,23 @@ -- ) and unique_key not in ( -- select unique_key from scd_table where active_row = 1 -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select distinct _AIRBYTE_UNIQUE_KEY - from {{ this }} - left join ( - select _AIRBYTE_UNIQUE_KEY as active_unique_key + select inactive_counts.unique_key + from ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as inactive_count + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 0 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + group by _AIRBYTE_UNIQUE_KEY + ) inactive_counts left join ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count from {{ this }} where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - ) active_recent_scd_rows on _AIRBYTE_UNIQUE_KEY = active_unique_key - where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - group by _AIRBYTE_UNIQUE_KEY - having count(active_unique_key) = 0 + group by _AIRBYTE_UNIQUE_KEY + ) active_counts on inactive_counts.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 ) {% else %} -- We have to have a non-empty query, so just do a noop delete From a47cf054aaa8ec32390654401ebbc24c22c9b262 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 13 Jun 2022 14:47:46 -0700 Subject: [PATCH 41/43] simplify query --- .../transform_catalog/stream_processor.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 895d1f37abc6c..56243634f7d05 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -1168,18 +1168,19 @@ def add_to_outputs( -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. {{ delete_statement }} where {{ unique_key_reference }} in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select {{ unique_key }} as unique_key, count({{ unique_key }}) as inactive_count - from {{ '{{ this }}' }} - where {{ active_row_column_name }} = 0 {{ normalized_at_incremental_clause }} - group by {{ unique_key }} - ) inactive_counts left join ( - select {{ unique_key }} as unique_key, count({{ unique_key }}) as active_count - from {{ '{{ this }}' }} - where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} - group by {{ unique_key }} - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct {{ unique_key }} as unique_key + from {{ '{{ this }}' }} + where 1=1 {{ normalized_at_incremental_clause }} + ) recent_records + left join ( + select {{ unique_key }} as unique_key, count({{ unique_key }}) as active_count + from {{ '{{ this }}' }} + where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} + group by {{ unique_key }} + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {{ '{% else %}' }} From 087d556649d3bee1da38785d98b90a96dff03172 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 13 Jun 2022 18:29:38 -0700 Subject: [PATCH 42/43] regenerate output --- ..._columns_resulting_into_long_names_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- .../dedup_cdc_excluded_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- ..._stream_with_co_1g_into_long_names_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- ...ream_with_c__lting_into_long_names_scd.sql | 23 ++++++++++--------- .../some_stream_that_was_empty_scd.sql | 23 ++++++++++--------- .../1_prefix_startwith_number_scd.sql | 23 ++++++++++--------- .../dedup_cdc_excluded_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- .../multiple_column_names_conflicts_scd.sql | 23 ++++++++++--------- .../test_normalization/pos_dedup_cdcx_scd.sql | 23 ++++++++++--------- .../renamed_dedup_cdc_excluded_scd.sql | 23 ++++++++++--------- .../dedup_cdc_excluded_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- .../renamed_dedup_cdc_excluded_scd.sql | 23 ++++++++++--------- ..._columns_resulting_into_long_names_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- .../dedup_exchange_rate_scd.sql | 23 ++++++++++--------- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 23 ++++++++++--------- .../DEDUP_EXCHANGE_RATE_SCD.sql | 23 ++++++++++--------- 24 files changed, 288 insertions(+), 264 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 7b76df58b37d0..1df163184ca05 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -30,18 +30,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index aaf0c846bdd37..ce21bef8c7221 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -30,18 +30,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 7bef54bc9932b..4f6b80934992c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -30,18 +30,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index af5142602731b..f87d45a5c18cf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -28,18 +28,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 4b759e0bdd70d..bd834917f06b4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -28,18 +28,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 899fac24bfd19..9ffb6bd5558cc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -28,18 +28,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 7e1a17fc3423b..b1c2af62e4bf1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -28,18 +28,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index b286d8e7dbfe9..9320dbc51f60f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -28,18 +28,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as inactive_count - from {{ this }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 0 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} - ) inactive_counts left join ( - select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as active_count - from {{ this }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} - group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key + from {{ this }} + where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as active_count + from {{ this }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 08ec2275c2c1a..5eaf6186aaab4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index 2654fe8c03213..c35233d432cb3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index a914b5cbabe9a..01e0c49d1c7c4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index d19aaa68d3f55..5affe9825e3be 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 38ef6bf2cb1d3..ef0cf7e1e95f5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 984e0c38c921e..77d393c856892 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 4b4f20c7cb94a..ff471c6abaab1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 6118f641c2e6d..d8da713c68711 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index d19aaa68d3f55..5affe9825e3be 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a2a4b74c37726..7e6225fb7cfc4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 95f300f06ea59..96f720b3d2659 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index adaeb9bfcdad2..627f56e3ad2ab 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index ecac7537743ae..683191e161c56 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index ecdc6d8951a41..2582b1213c704 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as inactive_count - from {{ this }} - where _airbyte_active_row = 0 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) inactive_counts left join ( - select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count - from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} - group by _airbyte_unique_key - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index fc70e5631cefe..7b46e390d0575 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as inactive_count - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 0 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - group by _AIRBYTE_UNIQUE_KEY - ) inactive_counts left join ( - select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} - group by _AIRBYTE_UNIQUE_KEY - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _AIRBYTE_UNIQUE_KEY as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ) recent_records + left join ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + group by _AIRBYTE_UNIQUE_KEY + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 0a47ea898f682..13f4936015110 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -29,18 +29,19 @@ -- entries that were _updated_ recently. This is because a deleted record will have an SCD record -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( - select inactive_counts.unique_key + select recent_records.unique_key from ( - select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as inactive_count - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 0 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - group by _AIRBYTE_UNIQUE_KEY - ) inactive_counts left join ( - select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count - from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} - group by _AIRBYTE_UNIQUE_KEY - ) active_counts on inactive_counts.unique_key = active_counts.unique_key + select distinct _AIRBYTE_UNIQUE_KEY as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ) recent_records + left join ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + group by _AIRBYTE_UNIQUE_KEY + ) active_counts + on recent_records.unique_key = active_counts.unique_key where active_count is null or active_count = 0 ) {% else %} From 9526c19a9e6d33eec669ec398a32094e20dddc57 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 14 Jun 2022 11:30:08 -0700 Subject: [PATCH 43/43] bump versions + changelog --- airbyte-integrations/bases/base-normalization/Dockerfile | 2 +- .../bases/base-normalization/snowflake.Dockerfile | 2 +- .../workers/normalization/NormalizationRunnerFactory.java | 2 +- docs/understanding-airbyte/basic-normalization.md | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile index 17ed8d98d9c2a..cce30a21f7bba 100644 --- a/airbyte-integrations/bases/base-normalization/Dockerfile +++ b/airbyte-integrations/bases/base-normalization/Dockerfile @@ -28,5 +28,5 @@ WORKDIR /airbyte ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" ENTRYPOINT ["/airbyte/entrypoint.sh"] -LABEL io.airbyte.version=0.2.3 +LABEL io.airbyte.version=0.2.4 LABEL io.airbyte.name=airbyte/normalization diff --git a/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile index 07f8206a0b369..209ec40ffb196 100644 --- a/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile +++ b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile @@ -29,5 +29,5 @@ WORKDIR /airbyte ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" ENTRYPOINT ["/airbyte/entrypoint.sh"] -LABEL io.airbyte.version=0.2.3 +LABEL io.airbyte.version=0.2.4 LABEL io.airbyte.name=airbyte/normalization-snowflake diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 26f72f405320d..27d6ff6cd8fd1 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -14,7 +14,7 @@ public class NormalizationRunnerFactory { public static final String BASE_NORMALIZATION_IMAGE_NAME = "airbyte/normalization"; - public static final String NORMALIZATION_VERSION = "0.2.3"; + public static final String NORMALIZATION_VERSION = "0.2.4"; static final Map> NORMALIZATION_MAPPING = ImmutableMap.>builder() diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/understanding-airbyte/basic-normalization.md index 50f3a84e59a17..9f7af6ef5ba51 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/understanding-airbyte/basic-normalization.md @@ -352,6 +352,7 @@ Therefore, in order to "upgrade" to the desired normalization version, you need | Airbyte Version | Normalization Version | Date | Pull Request | Subject | |:----------------| :--- | :--- | :--- | :--- | +| | 0.2.4 | 2022-06-14 | [\#12846](https://github.com/airbytehq/airbyte/pull/12846) | CDC correctly deletes propagates deletions to final tables | | | 0.2.3 | 2022-06-10 | [\#11204](https://github.com/airbytehq/airbyte/pull/11204) | MySQL: add support for SSh tunneling | | | 0.2.2 | 2022-06-02 | [\#13289](https://github.com/airbytehq/airbyte/pull/13289) | BigQuery use `json_extract_string_array` for array of simple type elements | | | 0.2.1 | 2022-05-17 | [\#12924](https://github.com/airbytehq/airbyte/pull/12924) | Fixed checking --event-buffer-size on old dbt crashed entrypoint.sh |