airbytehq · ChristopheDuong · Jan 6, 2022 · Dec 22, 2021 · Dec 23, 2021 · Dec 23, 2021
diff --git a/airbyte-integrations/bases/base-normalization/.gitignore b/airbyte-integrations/bases/base-normalization/.gitignore
@@ -20,6 +20,7 @@ integration_tests/normalization_test_output/**/*.yml
 # Simple Streams
 !integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql
 !integration_tests/normalization_test_output/**/exchange_rate.sql
+!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql
 # Nested Streams
 # Parent table
 !integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql

diff --git a/...d/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/...d/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
@@ -26,9 +26,9 @@ scd_data as (
 ), '')) as 
     string
 ))) as _airbyte_unique_key,
-        id,
-        date,
-        `partition`,
+      id,
+      date,
+      `partition`,
       date as _airbyte_start_at,
       lag(date) over (
         partition by id
@@ -54,7 +54,10 @@ dedup_data as (
         -- we need to ensure de-duplicated rows for merge/update queries
         -- additionally, we generate a unique key for the scd table
         row_number() over (
-            partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
+            partition by
+                _airbyte_unique_key,
+                _airbyte_start_at,
+                _airbyte_emitted_at
             order by _airbyte_active_row desc, _airbyte_ab_id
         ) as _airbyte_row_num,
         to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as 
@@ -72,9 +75,9 @@ dedup_data as (
 select
     _airbyte_unique_key,
     _airbyte_unique_key_scd,
-        id,
-        date,
-        `partition`,
+    id,
+    date,
+    `partition`,
     _airbyte_start_at,
     _airbyte_end_at,
     _airbyte_active_row,

diff --git a/...d/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/...d/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql
@@ -57,11 +57,11 @@ scd_data as (
     -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
     select
       {{ dbt_utils.surrogate_key([
-            'id',
+      'id',
       ]) }} as _airbyte_unique_key,
-        id,
-        date,
-        {{ adapter.quote('partition') }},
+      id,
+      date,
+      {{ adapter.quote('partition') }},
       date as _airbyte_start_at,
       lag(date) over (
         partition by id
@@ -87,7 +87,10 @@ dedup_data as (
         -- we need to ensure de-duplicated rows for merge/update queries
         -- additionally, we generate a unique key for the scd table
         row_number() over (
-            partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
+            partition by
+                _airbyte_unique_key,
+                _airbyte_start_at,
+                _airbyte_emitted_at
             order by _airbyte_active_row desc, _airbyte_ab_id
         ) as _airbyte_row_num,
         {{ dbt_utils.surrogate_key([
@@ -101,9 +104,9 @@ dedup_data as (
 select
     _airbyte_unique_key,
     _airbyte_unique_key_scd,
-        id,
-        date,
-        {{ adapter.quote('partition') }},
+    id,
+    date,
+    {{ adapter.quote('partition') }},
     _airbyte_start_at,
     _airbyte_end_at,
     _airbyte_active_row,

diff --git a/...reams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/...reams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -30,14 +30,14 @@ scd_data as (
 ), '')) as 
     string
 ))) as _airbyte_unique_key,
-        id,
-        currency,
-        date,
-        timestamp_col,
-        HKD_special___characters,
-        HKD_special___characters_1,
-        NZD,
-        USD,
+      id,
+      currency,
+      date,
+      timestamp_col,
+      HKD_special___characters,
+      HKD_special___characters_1,
+      NZD,
+      USD,
       date as _airbyte_start_at,
       lag(date) over (
         partition by id, currency, cast(NZD as 
@@ -67,7 +67,10 @@ dedup_data as (
         -- we need to ensure de-duplicated rows for merge/update queries
         -- additionally, we generate a unique key for the scd table
         row_number() over (
-            partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
+            partition by
+                _airbyte_unique_key,
+                _airbyte_start_at,
+                _airbyte_emitted_at
             order by _airbyte_active_row desc, _airbyte_ab_id
         ) as _airbyte_row_num,
         to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as 
@@ -85,14 +88,14 @@ dedup_data as (
 select
     _airbyte_unique_key,
     _airbyte_unique_key_scd,
-        id,
-        currency,
-        date,
-        timestamp_col,
-        HKD_special___characters,
-        HKD_special___characters_1,
-        NZD,
-        USD,
+    id,
+    currency,
+    date,
+    timestamp_col,
+    HKD_special___characters,
+    HKD_special___characters_1,
+    NZD,
+    USD,
     _airbyte_start_at,
     _airbyte_end_at,
     _airbyte_active_row,

diff --git a/...ams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/...ams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql
@@ -0,0 +1,77 @@
+
+
+  create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg`
+  OPTIONS()
+  as 
+with __dbt__cte__multiple_column_names_conflicts_ab1 as (
+
+-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
+-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts
+select
+    json_extract_scalar(_airbyte_data, "$['id']") as id,
+    json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id,
+    json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1,
+    json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2,
+    json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3,
+    json_extract_scalar(_airbyte_data, "$['UserId']") as UserId,
+    _airbyte_ab_id,
+    _airbyte_emitted_at,
+    CURRENT_TIMESTAMP() as _airbyte_normalized_at
+from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias
+-- multiple_column_names_conflicts
+where 1 = 1
+
+),  __dbt__cte__multiple_column_names_conflicts_ab2 as (
+
+-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
+-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1
+select
+    cast(id as 
+    int64
+) as id,
+    cast(User_Id as 
+    string
+) as User_Id,
+    cast(user_id_1 as 
+    float64
+) as user_id_1,
+    cast(User_id_2 as 
+    float64
+) as User_id_2,
+    cast(user_id_3 as 
+    float64
+) as user_id_3,
+    cast(UserId as 
+    float64
+) as UserId,
+    _airbyte_ab_id,
+    _airbyte_emitted_at,
+    CURRENT_TIMESTAMP() as _airbyte_normalized_at
+from __dbt__cte__multiple_column_names_conflicts_ab1
+-- multiple_column_names_conflicts
+where 1 = 1
+
+)-- SQL model to build a hash column based on the values of this record
+-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2
+select
+    to_hex(md5(cast(concat(coalesce(cast(id as 
+    string
+), ''), '-', coalesce(cast(User_Id as 
+    string
+), ''), '-', coalesce(cast(user_id_1 as 
+    string
+), ''), '-', coalesce(cast(User_id_2 as 
+    string
+), ''), '-', coalesce(cast(user_id_3 as 
+    string
+), ''), '-', coalesce(cast(UserId as 
+    string
+), '')) as 
+    string
+))) as _airbyte_multiple_column_names_conflicts_hashid,
+    tmp.*
+from __dbt__cte__multiple_column_names_conflicts_ab2 tmp
+-- multiple_column_names_conflicts
+where 1 = 1
+;
+
diff --git a/...s/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/...s/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -59,18 +59,18 @@ scd_data as (
     -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
     select
       {{ dbt_utils.surrogate_key([
-            'id',
-            'currency',
-            'NZD',
+      'id',
+      'currency',
+      'NZD',
       ]) }} as _airbyte_unique_key,
-        id,
-        currency,
-        date,
-        timestamp_col,
-        HKD_special___characters,
-        HKD_special___characters_1,
-        NZD,
-        USD,
+      id,
+      currency,
+      date,
+      timestamp_col,
+      HKD_special___characters,
+      HKD_special___characters_1,
+      NZD,
+      USD,
       date as _airbyte_start_at,
       lag(date) over (
         partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }})
@@ -96,7 +96,10 @@ dedup_data as (
         -- we need to ensure de-duplicated rows for merge/update queries
         -- additionally, we generate a unique key for the scd table
         row_number() over (
-            partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
+            partition by
+                _airbyte_unique_key,
+                _airbyte_start_at,
+                _airbyte_emitted_at
             order by _airbyte_active_row desc, _airbyte_ab_id
         ) as _airbyte_row_num,
         {{ dbt_utils.surrogate_key([
@@ -110,14 +113,14 @@ dedup_data as (
 select
     _airbyte_unique_key,
     _airbyte_unique_key_scd,
-        id,
-        currency,
-        date,
-        timestamp_col,
-        HKD_special___characters,
-        HKD_special___characters_1,
-        NZD,
-        USD,
+    id,
+    currency,
+    date,
+    timestamp_col,
+    HKD_special___characters,
+    HKD_special___characters_1,
+    NZD,
+    USD,
     _airbyte_start_at,
     _airbyte_end_at,
     _airbyte_active_row,

diff --git a/...tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml b/...tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml
@@ -9,5 +9,6 @@ sources:
   - name: _airbyte_raw_dedup_cdc_excluded
   - name: _airbyte_raw_dedup_exchange_rate
   - name: _airbyte_raw_exchange_rate
+  - name: _airbyte_raw_multiple_column_names_conflicts
   - name: _airbyte_raw_pos_dedup_cdcx
   - name: _airbyte_raw_renamed_dedup_cdc_excluded
diff --git a/...d_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/...d_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql
@@ -59,18 +59,18 @@ scd_data as (
     -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key
     select
       {{ dbt_utils.surrogate_key([
-            'id',
-            'currency',
-            'NZD',
+      'id',
+      'currency',
+      'NZD',
       ]) }} as _airbyte_unique_key,
-        id,
-        currency,
-        new_column,
-        date,
-        timestamp_col,
-        HKD_special___characters,
-        NZD,
-        USD,
+      id,
+      currency,
+      new_column,
+      date,
+      timestamp_col,
+      HKD_special___characters,
+      NZD,
+      USD,
       date as _airbyte_start_at,
       lag(date) over (
         partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }})
@@ -96,7 +96,10 @@ dedup_data as (
         -- we need to ensure de-duplicated rows for merge/update queries
         -- additionally, we generate a unique key for the scd table
         row_number() over (
-            partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
+            partition by
+                _airbyte_unique_key,
+                _airbyte_start_at,
+                _airbyte_emitted_at
             order by _airbyte_active_row desc, _airbyte_ab_id
         ) as _airbyte_row_num,
         {{ dbt_utils.surrogate_key([
@@ -110,14 +113,14 @@ dedup_data as (
 select
     _airbyte_unique_key,
     _airbyte_unique_key_scd,
-        id,
-        currency,
-        new_column,
-        date,
-        timestamp_col,
-        HKD_special___characters,
-        NZD,
-        USD,
+    id,
+    currency,
+    new_column,
+    date,
+    timestamp_col,
+    HKD_special___characters,
+    NZD,
+    USD,
     _airbyte_start_at,
     _airbyte_end_at,
     _airbyte_active_row,