From ffe076cc7afa5134ad9f66f6e9a85870b5327c23 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 2 Dec 2022 11:26:25 -0800 Subject: [PATCH 01/11] Add Airbyte Protocol V1 support. --- .../AirbyteMessageVersionedMigrator.java | 2 +- .../migrations/AirbyteMessageMigrationV0.java | 3 +- .../migrations/AirbyteMessageMigrationV1.java | 38 +++++++++++++++++++ .../serde/AirbyteMessageV1Deserializer.java | 18 +++++++++ .../serde/AirbyteMessageV1Serializer.java | 18 +++++++++ ...byteMessageSerDeProviderMicronautTest.java | 2 +- ...VersionedAirbyteMessageBufferedWriter.java | 9 +---- .../VersionedAirbyteStreamFactory.java | 10 +---- .../java/io/airbyte/config/EnvConfigs.java | 8 ++-- 9 files changed, 84 insertions(+), 24 deletions(-) create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java index 7514ca28fc5d6..c421777c03eb2 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java @@ -5,7 +5,7 @@ package io.airbyte.commons.protocol; import io.airbyte.commons.version.Version; -import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage; /** * Wraps message migration from a fixed version to the most recent version diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java index 9306cf21e7528..a740a4df4fcbc 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java @@ -7,13 +7,12 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.version.Version; import io.airbyte.protocol.models.AirbyteMessage; -import jakarta.inject.Singleton; /** * Demo migration to illustrate the template. This should be deleted once we added the v0 to v1 * migration. */ -@Singleton +@Deprecated public class AirbyteMessageMigrationV0 implements AirbyteMessageMigration { diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java new file mode 100644 index 0000000000000..f0c6293728893 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import jakarta.inject.Singleton; + +/** + * Placeholder AirbyteMessage Migration from v0 to v1 + */ +@Singleton +public class AirbyteMessageMigrationV1 implements AirbyteMessageMigration { + + @Override + public AirbyteMessage downgrade(io.airbyte.protocol.models.AirbyteMessage message) { + return Jsons.object(Jsons.jsonNode(message), AirbyteMessage.class); + } + + @Override + public io.airbyte.protocol.models.AirbyteMessage upgrade(AirbyteMessage message) { + return Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.AirbyteMessage.class); + } + + @Override + public Version getPreviousVersion() { + return new Version("0.0.0"); + } + + @Override + public Version getCurrentVersion() { + return new Version("1.0.0"); + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java new file mode 100644 index 0000000000000..968ce9a45b897 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.serde; + +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.protocol.models.AirbyteMessage; +import jakarta.inject.Singleton; + +@Singleton +public class AirbyteMessageV1Deserializer extends AirbyteMessageGenericDeserializer { + + public AirbyteMessageV1Deserializer() { + super(new AirbyteVersion("1.0.0"), AirbyteMessage.class); + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java new file mode 100644 index 0000000000000..b3e4a84ef116b --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.serde; + +import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.protocol.models.AirbyteMessage; +import jakarta.inject.Singleton; + +@Singleton +public class AirbyteMessageV1Serializer extends AirbyteMessageGenericSerializer { + + public AirbyteMessageV1Serializer() { + super(new AirbyteVersion("1.0.0")); + } + +} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageSerDeProviderMicronautTest.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageSerDeProviderMicronautTest.java index ccab4503aa1db..6bae9ba4b0b07 100644 --- a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageSerDeProviderMicronautTest.java +++ b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageSerDeProviderMicronautTest.java @@ -22,7 +22,7 @@ class AirbyteMessageSerDeProviderMicronautTest { @Test void testSerDeInjection() { // This should contain the list of all the supported majors of the airbyte protocol - final Set expectedVersions = new HashSet<>(List.of("0")); + final Set expectedVersions = new HashSet<>(List.of("0", "1")); assertEquals(expectedVersions, serDeProvider.getDeserializerKeys()); assertEquals(expectedVersions, serDeProvider.getSerializerKeys()); diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java index 9463b89e8ae52..e1b9b25a4b92e 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java @@ -4,7 +4,6 @@ package io.airbyte.workers.internal; -import io.airbyte.commons.json.Jsons; import io.airbyte.commons.protocol.AirbyteMessageVersionedMigrator; import io.airbyte.commons.protocol.serde.AirbyteMessageSerializer; import io.airbyte.protocol.models.AirbyteMessage; @@ -26,15 +25,9 @@ public VersionedAirbyteMessageBufferedWriter(final BufferedWriter writer, @Override public void write(final AirbyteMessage message) throws IOException { - final T downgradedMessage = migrator.downgrade(convert(message)); + final T downgradedMessage = migrator.downgrade(message); writer.write(serializer.serialize(downgradedMessage)); writer.newLine(); } - // TODO remove this conversion once we migrated default AirbyteMessage to be from a versioned - // namespace - private io.airbyte.protocol.models.v0.AirbyteMessage convert(final AirbyteMessage message) { - return Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.v0.AirbyteMessage.class); - } - } diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java index fe4a88d566053..2c829415cccca 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java @@ -164,18 +164,12 @@ final protected void initializeForProtocolVersion(final Version protocolVersion) @Override protected Stream toAirbyteMessage(final JsonNode json) { try { - final io.airbyte.protocol.models.v0.AirbyteMessage message = migrator.upgrade(deserializer.deserialize(json)); - return Stream.of(convert(message)); + final AirbyteMessage message = migrator.upgrade(deserializer.deserialize(json)); + return Stream.of(message); } catch (final RuntimeException e) { logger.warn("Failed to upgrade a message from version {}: {}", protocolVersion, Jsons.serialize(json), e); return Stream.empty(); } } - // TODO remove this conversion once we migrated default AirbyteMessage to be from a versioned - // namespace - private AirbyteMessage convert(final io.airbyte.protocol.models.v0.AirbyteMessage message) { - return Jsons.object(Jsons.jsonNode(message), AirbyteMessage.class); - } - } diff --git a/airbyte-config/config-models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/config-models/src/main/java/io/airbyte/config/EnvConfigs.java index ef3d7b6750d62..f9b4e4bb52189 100644 --- a/airbyte-config/config-models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/config-models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -42,8 +42,6 @@ public class EnvConfigs implements Configs { // env variable names public static final String AIRBYTE_ROLE = "AIRBYTE_ROLE"; public static final String AIRBYTE_VERSION = "AIRBYTE_VERSION"; - public static final String AIRBYTE_PROTOCOL_VERSION_MAX = "AIRBYTE_PROTOCOL_VERSION_MAX"; - public static final String AIRBYTE_PROTOCOL_VERSION_MIN = "AIRBYTE_PROTOCOL_VERSION_MIN"; public static final String INTERNAL_API_HOST = "INTERNAL_API_HOST"; public static final String AIRBYTE_API_AUTH_HEADER_NAME = "AIRBYTE_API_AUTH_HEADER_NAME"; public static final String AIRBYTE_API_AUTH_HEADER_VALUE = "AIRBYTE_API_AUTH_HEADER_VALUE"; @@ -202,6 +200,8 @@ public class EnvConfigs implements Configs { private static final long DEFAULT_MAX_SYNC_WORKERS = 5; private static final long DEFAULT_MAX_NOTIFY_WORKERS = 5; private static final String DEFAULT_NETWORK = "host"; + private static final Version DEFAULT_AIRBYTE_PROTOCOL_VERSION_MAX = new Version("1.0.0"); + private static final Version DEFAULT_AIRBYTE_PROTOCOL_VERSION_MIN = new Version("0.0.0"); private static final String AUTO_DETECT_SCHEMA = "AUTO_DETECT_SCHEMA"; public static final Map> JOB_SHARED_ENVS = Map.of( @@ -296,12 +296,12 @@ public AirbyteVersion getAirbyteVersion() { @Override public Version getAirbyteProtocolVersionMax() { - return new Version(getEnvOrDefault(AIRBYTE_PROTOCOL_VERSION_MAX, "0.3.0")); + return DEFAULT_AIRBYTE_PROTOCOL_VERSION_MAX; } @Override public Version getAirbyteProtocolVersionMin() { - return new Version(getEnvOrDefault(AIRBYTE_PROTOCOL_VERSION_MIN, "0.0.0")); + return DEFAULT_AIRBYTE_PROTOCOL_VERSION_MIN; } @Override From 06129046b12355084eb6bd8b3054208ff8f14a9e Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 2 Dec 2022 13:30:20 -0800 Subject: [PATCH 02/11] Fix VersionedAirbyteStreamFactoryTest --- .../internal/VersionedAirbyteStreamFactoryTest.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java index f6ac35de3a54f..fb3d25a523538 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java @@ -12,8 +12,11 @@ import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; import io.airbyte.commons.protocol.migrations.AirbyteMessageMigrationV0; +import io.airbyte.commons.protocol.migrations.AirbyteMessageMigrationV1; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Deserializer; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Serializer; +import io.airbyte.commons.protocol.serde.AirbyteMessageV1Deserializer; +import io.airbyte.commons.protocol.serde.AirbyteMessageV1Serializer; import io.airbyte.commons.version.Version; import io.airbyte.protocol.models.AirbyteMessage; import java.io.BufferedReader; @@ -36,11 +39,11 @@ class VersionedAirbyteStreamFactoryTest { @BeforeEach void beforeEach() { serDeProvider = spy(new AirbyteMessageSerDeProvider( - List.of(new AirbyteMessageV0Deserializer()), - List.of(new AirbyteMessageV0Serializer()))); + List.of(new AirbyteMessageV0Deserializer(), new AirbyteMessageV1Deserializer()), + List.of(new AirbyteMessageV0Serializer(), new AirbyteMessageV1Serializer()))); serDeProvider.initialize(); final AirbyteMessageMigrator migrator = new AirbyteMessageMigrator( - List.of(new AirbyteMessageMigrationV0())); + List.of(new AirbyteMessageMigrationV1())); migrator.initialize(); migratorFactory = spy(new AirbyteMessageVersionedMigratorFactory(migrator)); } From 7bbb282a5439a803a6867d67243573c0d9c10291 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 2 Dec 2022 13:33:29 -0800 Subject: [PATCH 03/11] Remove AirbyteMessageMigrationV0 example --- .../migrations/AirbyteMessageMigrationV0.java | 43 ------------ .../migrations/DefaultToV0MigrationTest.java | 67 ------------------- .../VersionedAirbyteStreamFactoryTest.java | 1 - 3 files changed, 111 deletions(-) delete mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java delete mode 100644 airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/DefaultToV0MigrationTest.java diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java deleted file mode 100644 index a740a4df4fcbc..0000000000000 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV0.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.protocol.migrations; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.version.Version; -import io.airbyte.protocol.models.AirbyteMessage; - -/** - * Demo migration to illustrate the template. This should be deleted once we added the v0 to v1 - * migration. - */ -@Deprecated -public class AirbyteMessageMigrationV0 - implements AirbyteMessageMigration { - - @Override - public io.airbyte.protocol.models.v0.AirbyteMessage upgrade(final io.airbyte.protocol.models.AirbyteMessage oldMessage) { - final io.airbyte.protocol.models.v0.AirbyteMessage newMessage = - Jsons.object(Jsons.jsonNode(oldMessage), io.airbyte.protocol.models.v0.AirbyteMessage.class); - return newMessage; - } - - @Override - public io.airbyte.protocol.models.AirbyteMessage downgrade(final io.airbyte.protocol.models.v0.AirbyteMessage newMessage) { - final io.airbyte.protocol.models.AirbyteMessage oldMessage = - Jsons.object(Jsons.jsonNode(newMessage), io.airbyte.protocol.models.AirbyteMessage.class); - return oldMessage; - } - - @Override - public Version getPreviousVersion() { - return new Version("0.2.0"); - } - - @Override - public Version getCurrentVersion() { - return new Version("0.2.0"); - } - -} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/DefaultToV0MigrationTest.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/DefaultToV0MigrationTest.java deleted file mode 100644 index a3b0d3eb58f23..0000000000000 --- a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/DefaultToV0MigrationTest.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.protocol.migrations; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.net.URI; -import lombok.SneakyThrows; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class DefaultToV0MigrationTest { - - AirbyteMessageMigrationV0 v0migration; - - @BeforeEach - void beforeEach() { - v0migration = new AirbyteMessageMigrationV0(); - } - - @Test - void testVersionMetadata() { - assertEquals("0", v0migration.getPreviousVersion().getMajorVersion()); - assertEquals("0", v0migration.getCurrentVersion().getMajorVersion()); - } - - @Test - void testDowngrade() { - final io.airbyte.protocol.models.v0.AirbyteMessage v0Message = getV0Message(); - - final io.airbyte.protocol.models.AirbyteMessage downgradedMessage = v0migration.downgrade(v0Message); - final io.airbyte.protocol.models.AirbyteMessage expectedMessage = getUnversionedMessage(); - assertEquals(expectedMessage, downgradedMessage); - } - - @Test - void testUpgrade() { - final io.airbyte.protocol.models.AirbyteMessage unversionedMessage = getUnversionedMessage(); - - final io.airbyte.protocol.models.v0.AirbyteMessage upgradedMessage = v0migration.upgrade(unversionedMessage); - final io.airbyte.protocol.models.v0.AirbyteMessage expectedMessage = getV0Message(); - assertEquals(expectedMessage, upgradedMessage); - } - - @SneakyThrows - private io.airbyte.protocol.models.v0.AirbyteMessage getV0Message() { - return new io.airbyte.protocol.models.v0.AirbyteMessage() - .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.SPEC) - .withSpec( - new io.airbyte.protocol.models.v0.ConnectorSpecification() - .withProtocolVersion("0.3.0") - .withDocumentationUrl(new URI("file:///tmp/doc"))); - } - - @SneakyThrows - private io.airbyte.protocol.models.AirbyteMessage getUnversionedMessage() { - return new io.airbyte.protocol.models.AirbyteMessage() - .withType(io.airbyte.protocol.models.AirbyteMessage.Type.SPEC) - .withSpec( - new io.airbyte.protocol.models.ConnectorSpecification() - .withProtocolVersion("0.3.0") - .withDocumentationUrl(new URI("file:///tmp/doc"))); - } - -} diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java index fb3d25a523538..b692938e84e0c 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java @@ -11,7 +11,6 @@ import io.airbyte.commons.protocol.AirbyteMessageMigrator; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; -import io.airbyte.commons.protocol.migrations.AirbyteMessageMigrationV0; import io.airbyte.commons.protocol.migrations.AirbyteMessageMigrationV1; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Deserializer; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Serializer; From 7b4379e9bee0e083338620c0210d3feefc945803 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 2 Dec 2022 13:47:28 -0800 Subject: [PATCH 04/11] Add Protocol Version constants --- .../protocol/migrations/AirbyteMessageMigrationV1.java | 5 +++-- .../protocol/serde/AirbyteMessageGenericDeserializer.java | 6 +++--- .../protocol/serde/AirbyteMessageGenericSerializer.java | 4 ++-- .../protocol/serde/AirbyteMessageV0Deserializer.java | 4 ++-- .../commons/protocol/serde/AirbyteMessageV0Serializer.java | 4 ++-- .../protocol/serde/AirbyteMessageV1Deserializer.java | 4 ++-- .../commons/protocol/serde/AirbyteMessageV1Serializer.java | 4 ++-- .../io/airbyte/commons/version/AirbyteProtocolVersion.java | 2 ++ 8 files changed, 18 insertions(+), 15 deletions(-) diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java index f0c6293728893..501fca10c464c 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java @@ -5,6 +5,7 @@ package io.airbyte.commons.protocol.migrations; import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.version.AirbyteProtocolVersion; import io.airbyte.commons.version.Version; import io.airbyte.protocol.models.v0.AirbyteMessage; import jakarta.inject.Singleton; @@ -27,12 +28,12 @@ public io.airbyte.protocol.models.AirbyteMessage upgrade(AirbyteMessage message) @Override public Version getPreviousVersion() { - return new Version("0.0.0"); + return AirbyteProtocolVersion.V0; } @Override public Version getCurrentVersion() { - return new Version("1.0.0"); + return AirbyteProtocolVersion.V1; } } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericDeserializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericDeserializer.java index 31a05f748fb33..660097c2fd2f2 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericDeserializer.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericDeserializer.java @@ -6,16 +6,16 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.commons.version.Version; import lombok.Getter; public class AirbyteMessageGenericDeserializer implements AirbyteMessageDeserializer { @Getter - final AirbyteVersion targetVersion; + final Version targetVersion; final Class typeClass; - public AirbyteMessageGenericDeserializer(final AirbyteVersion targetVersion, final Class typeClass) { + public AirbyteMessageGenericDeserializer(final Version targetVersion, final Class typeClass) { this.targetVersion = targetVersion; this.typeClass = typeClass; } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericSerializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericSerializer.java index d3e7b251c0d66..e5a23288db362 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericSerializer.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageGenericSerializer.java @@ -5,7 +5,7 @@ package io.airbyte.commons.protocol.serde; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.commons.version.Version; import lombok.AllArgsConstructor; import lombok.Getter; @@ -13,7 +13,7 @@ public class AirbyteMessageGenericSerializer implements AirbyteMessageSerializer { @Getter - private final AirbyteVersion targetVersion; + private final Version targetVersion; @Override public String serialize(T message) { diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Deserializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Deserializer.java index 198fbff83cff0..f280c508e4177 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Deserializer.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Deserializer.java @@ -4,7 +4,7 @@ package io.airbyte.commons.protocol.serde; -import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.commons.version.AirbyteProtocolVersion; import io.airbyte.protocol.models.v0.AirbyteMessage; import jakarta.inject.Singleton; @@ -12,7 +12,7 @@ public class AirbyteMessageV0Deserializer extends AirbyteMessageGenericDeserializer { public AirbyteMessageV0Deserializer() { - super(new AirbyteVersion("0.3.0"), AirbyteMessage.class); + super(AirbyteProtocolVersion.V0, AirbyteMessage.class); } } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Serializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Serializer.java index d70442065cdb0..f68ce7be46a4e 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Serializer.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV0Serializer.java @@ -4,7 +4,7 @@ package io.airbyte.commons.protocol.serde; -import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.commons.version.AirbyteProtocolVersion; import io.airbyte.protocol.models.v0.AirbyteMessage; import jakarta.inject.Singleton; @@ -12,7 +12,7 @@ public class AirbyteMessageV0Serializer extends AirbyteMessageGenericSerializer { public AirbyteMessageV0Serializer() { - super(new AirbyteVersion("0.3.0")); + super(AirbyteProtocolVersion.V0); } } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java index 968ce9a45b897..8b1785c64ff6c 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Deserializer.java @@ -4,7 +4,7 @@ package io.airbyte.commons.protocol.serde; -import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.commons.version.AirbyteProtocolVersion; import io.airbyte.protocol.models.AirbyteMessage; import jakarta.inject.Singleton; @@ -12,7 +12,7 @@ public class AirbyteMessageV1Deserializer extends AirbyteMessageGenericDeserializer { public AirbyteMessageV1Deserializer() { - super(new AirbyteVersion("1.0.0"), AirbyteMessage.class); + super(AirbyteProtocolVersion.V1, AirbyteMessage.class); } } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java index b3e4a84ef116b..fba6dd5d030b5 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1Serializer.java @@ -4,7 +4,7 @@ package io.airbyte.commons.protocol.serde; -import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.commons.version.AirbyteProtocolVersion; import io.airbyte.protocol.models.AirbyteMessage; import jakarta.inject.Singleton; @@ -12,7 +12,7 @@ public class AirbyteMessageV1Serializer extends AirbyteMessageGenericSerializer { public AirbyteMessageV1Serializer() { - super(new AirbyteVersion("1.0.0")); + super(AirbyteProtocolVersion.V1); } } diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/version/AirbyteProtocolVersion.java b/airbyte-commons/src/main/java/io/airbyte/commons/version/AirbyteProtocolVersion.java index 27543ba3cd2ce..b587f0235cf94 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/version/AirbyteProtocolVersion.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/version/AirbyteProtocolVersion.java @@ -7,6 +7,8 @@ public class AirbyteProtocolVersion { public final static Version DEFAULT_AIRBYTE_PROTOCOL_VERSION = new Version("0.2.0"); + public final static Version V0 = new Version("0.3.0"); + public final static Version V1 = new Version("1.0.0"); public final static String AIRBYTE_PROTOCOL_VERSION_MAX_KEY_NAME = "airbyte_protocol_version_max"; public final static String AIRBYTE_PROTOCOL_VERSION_MIN_KEY_NAME = "airbyte_protocol_version_min"; From c1d77366392c9004f30d06da68b035d205ddcb25 Mon Sep 17 00:00:00 2001 From: Eugene Date: Fri, 6 Jan 2023 23:26:40 +0200 Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=8E=89Updated=20normalization=20to?= =?UTF-8?q?=20handle=20new=20datatypes=20(#19721)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Updated normalization simple stream processing to handle new datatypes * Updated normalization nested stream processing to handle new datatypes * Updated normalization nested stream processing to handle new datatypes * Updated normalization drop_scd_catalog processing to handle new datatypes * Updated normalization ephemeral test processing to handle new datatypes * fixed more tests for normalization * fixed more tests for normalization * fixed more tests for normalization * fixed more tests for normalization * fixed more issues * fixed more issues (clickhouse) * fixed more issues * fixed more issues * fixed more issues * added binary type processing for some DBs * cleared commented code and moved some hardcodes to processing as macro * fixed codestyle and cleared commented code * minor refactor * minor refactor * minor refactor * fixed bool cast error * fixed dict->str cast error * fixed is_combining_node cast py check * removed commented code * removed commented code * committed autogenerated normalization_test_output files * committed autogenerated normalization_test_output files (new files) * refactored utils.py * Updated utils.py to use Callable functions and get rid of property_type in is_number and is_bool functions * committed autogenerated normalization_test_output files (new files) * fixed typo in TIMESTAMP_WITH_TIMEZONE_TYPE * updated stream_processor to handle string type first as a wider type * fixed arrays normalization by updating is_simple_property method as per new approaches * format Co-authored-by: Edward Gao --- .../macros/cross_db_utils/datatypes.sql | 30 +++ ..._columns_resulting_into_long_names_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 5 + .../dedup_exchange_rate_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 17 ++ .../dedup_exchange_rate_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 16 ++ .../test_normalization/exchange_rate.sql | 5 + .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 10 +- .../dedup_cdc_excluded_scd.sql | 6 +- .../dedup_exchange_rate_scd.sql | 6 +- .../renamed_dedup_cdc_excluded_scd.sql | 6 +- .../dedup_exchange_rate.sql | 6 +- .../renamed_dedup_cdc_excluded.sql | 6 +- .../test_normalization/exchange_rate.sql | 5 +- .../dedup_exchange_rate_stg.sql | 2 +- .../multiple_column_names_conflicts_stg.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 8 +- .../renamed_dedup_cdc_excluded_ab2.sql | 4 +- .../test_normalization/exchange_rate.sql | 17 ++ .../dedup_exchange_rate_ab2.sql | 8 +- .../dedup_exchange_rate_scd.sql | 7 +- .../renamed_dedup_cdc_excluded_scd.sql | 7 +- .../dedup_exchange_rate.sql | 7 +- .../renamed_dedup_cdc_excluded.sql | 7 +- .../test_normalization/exchange_rate.sql | 5 +- .../dedup_exchange_rate_stg.sql | 2 +- .../mssql/test_nested_streams/dbt_project.yml | 138 ++++++++++---- ...eam_with_co__lting_into_long_names_scd.sql | 4 +- .../models/generated/sources.yml | 1 + .../mssql/test_simple_streams/dbt_project.yml | 103 ++++++---- .../test_normalization/exchange_rate.sql | 4 + .../dedup_exchange_rate_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 17 ++ .../models/generated/sources.yml | 1 + .../test_normalization/exchange_rate.sql | 4 + ..._stream_with_co_1g_into_long_names_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 50 ++--- .../dedup_exchange_rate_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 17 ++ .../test_normalization/exchange_rate.sql | 50 ++--- .../test_normalization/exchange_rate.sql | 8 +- .../dedup_exchange_rate_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 17 ++ .../test_normalization/exchange_rate.sql | 8 +- ...ream_with_c__lting_into_long_names_scd.sql | 4 +- .../some_stream_that_was_empty_scd.sql | 4 +- .../test_normalization/arrays.sql | 16 ++ .../arrays_nested_array_parent.sql | 16 ++ .../conflict_stream_array.sql | 16 ++ .../conflict_stream_name.sql | 16 ++ ...ict_stream_name___conflict_stream_name.sql | 16 ++ ...flict_stream_name_conflict_stream_name.sql | 16 ++ .../conflict_stream_scalar.sql | 16 ++ ...ested_stream_wi__lting_into_long_names.sql | 16 ++ .../test_normalization/unnest_alias.sql | 16 ++ ...t_alias_childre__column___with__quotes.sql | 16 ++ .../unnest_alias_children.sql | 16 ++ .../unnest_alias_children_owner.sql | 16 ++ .../test_normalization/exchange_rate.sql | 5 +- .../test_normalization/exchange_rate_ab1.sql | 1 + .../test_normalization/exchange_rate_ab2.sql | 1 + .../test_normalization/exchange_rate_ab3.sql | 1 + .../1_prefix_startwith_number_scd.sql | 4 +- .../dedup_cdc_excluded_scd.sql | 4 +- .../dedup_exchange_rate_scd.sql | 4 +- .../multiple_column_names_conflicts_scd.sql | 4 +- .../test_normalization/pos_dedup_cdcx_scd.sql | 4 +- .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../test_normalization/types_testing_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 17 ++ .../dedup_cdc_excluded_scd.sql | 4 +- .../dedup_exchange_rate_scd.sql | 4 +- .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../test_normalization/exchange_rate.sql | 16 ++ .../test_normalization/exchange_rate.sql | 5 +- .../test_nested_streams/dbt_project.yml | 140 +++++++------- ..._columns_resulting_into_long_names_scd.sql | 15 ++ ..._columns_resulting_into_long_names_scd.sql | 6 +- ...plex_columns_resulting_into_long_names.sql | 29 --- ...plex_columns_resulting_into_long_names.sql | 15 ++ ...ns_resulting_into_long_names_partition.sql | 9 + ...sulting_into_long_names_partition_data.sql | 9 + ...long_names_partition_double_array_data.sql | 9 + ...plex_columns_resulting_into_long_names.sql | 29 +++ ...ns_resulting_into_long_names_partition.sql | 8 +- ...sulting_into_long_names_partition_data.sql | 10 +- ...long_names_partition_double_array_data.sql | 10 +- ..._columns_resulting_into_long_names_ab1.sql | 6 +- ..._columns_resulting_into_long_names_ab2.sql | 2 +- ...esulting_into_long_names_partition_ab1.sql | 2 +- ...ing_into_long_names_partition_data_ab1.sql | 2 +- ..._names_partition_double_array_data_ab1.sql | 2 +- ..._columns_resulting_into_long_names_scd.sql | 12 +- ...plex_columns_resulting_into_long_names.sql | 4 +- ...ns_resulting_into_long_names_partition.sql | 2 +- ...sulting_into_long_names_partition_data.sql | 2 +- ...long_names_partition_double_array_data.sql | 2 +- .../models/generated/sources.yml | 16 +- ..._columns_resulting_into_long_names_scd.sql | 4 +- ..._columns_resulting_into_long_names_scd.sql | 15 ++ ...ns_resulting_into_long_names_partition.sql | 9 - ...sulting_into_long_names_partition_data.sql | 9 - ...long_names_partition_double_array_data.sql | 9 - ...plex_columns_resulting_into_long_names.sql | 4 +- ...ns_resulting_into_long_names_partition.sql | 9 + ...sulting_into_long_names_partition_data.sql | 9 + ...long_names_partition_double_array_data.sql | 9 + ...plex_columns_resulting_into_long_names.sql | 15 ++ ...ns_resulting_into_long_names_partition.sql | 9 + ...sulting_into_long_names_partition_data.sql | 9 + ...long_names_partition_double_array_data.sql | 9 + .../test_simple_streams/dbt_project.yml | 38 ++-- .../test_simple_streams/first_dbt_project.yml | 78 ++++---- .../dedup_exchange_rate_scd.sql | 6 +- .../dedup_exchange_rate_scd.sql | 15 ++ .../dedup_exchange_rate.sql | 8 +- .../dedup_exchange_rate.sql | 15 ++ .../exchange_rate.sql | 35 +++- .../exchange_rate.sql | 115 ++++++++++++ .../dedup_exchange_rate_stg.sql | 8 +- .../multiple_column_names_conflicts_stg.sql | 6 +- .../dedup_exchange_rate_stg.sql | 8 +- .../multiple_column_names_conflicts_stg.sql | 62 ++++++ .../dedup_exchange_rate_ab1.sql | 6 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 12 +- .../dedup_exchange_rate.sql | 4 +- .../test_normalization/exchange_rate.sql | 26 --- .../exchange_rate.sql | 47 +++++ .../dedup_exchange_rate_stg.sql | 2 +- .../models/generated/sources.yml | 2 +- .../dedup_exchange_rate_ab1.sql | 6 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../dedup_exchange_rate_scd.sql | 12 +- .../dedup_exchange_rate.sql | 4 +- .../test_normalization/exchange_rate.sql | 26 --- .../exchange_rate.sql | 42 +++++ .../dedup_exchange_rate_stg.sql | 2 +- .../modified_models/generated/sources.yml | 2 +- .../dedup_exchange_rate_scd.sql | 4 +- .../dedup_exchange_rate_scd.sql | 15 ++ .../dedup_exchange_rate.sql | 4 +- .../dedup_exchange_rate.sql | 15 ++ .../exchange_rate.sql | 115 ++++++++++++ .../exchange_rate.sql | 115 ++++++++++++ .../dedup_exchange_rate_stg.sql | 66 +++++++ .../dedup_exchange_rate_stg.sql | 66 +++++++ .../dedup_exchange_rate_scd.sql | 4 +- .../dedup_exchange_rate_scd.sql | 15 ++ .../dedup_exchange_rate.sql | 4 +- .../dedup_exchange_rate.sql | 15 ++ .../exchange_rate.sql | 10 +- .../exchange_rate.sql | 22 ++- .../dedup_exchange_rate_stg.sql | 8 +- .../dedup_exchange_rate_stg.sql | 68 +++++++ ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 4 +- .../TEST_NORMALIZATION/EXCHANGE_RATE.sql | 5 + .../DEDUP_EXCHANGE_RATE_SCD.sql | 4 +- .../TEST_NORMALIZATION/EXCHANGE_RATE.sql | 17 ++ .../TEST_NORMALIZATION/EXCHANGE_RATE.sql | 5 + .../tidb/test_nested_streams/dbt_project.yml | 121 ++++++++++++ ..._stream_with_co_1g_into_long_names_scd.sql | 20 ++ ..._stream_with_co___long_names_partition.sql | 20 ++ ..._stream_with_co___names_partition_data.sql | 20 ++ ..._stream_with_co__ion_double_array_data.sql | 20 ++ ..._stream_with_co__lting_into_long_names.sql | 20 ++ ..._stream_with_co_1g_into_long_names_ab1.sql | 19 ++ ..._stream_with_co_1g_into_long_names_ab2.sql | 19 ++ ..._stream_with_co_2g_names_partition_ab1.sql | 19 ++ ..._stream_with_co_3double_array_data_ab1.sql | 20 ++ ..._stream_with_co_3es_partition_data_ab1.sql | 20 ++ ..._stream_with_co_1g_into_long_names_scd.sql | 162 ++++++++++++++++ ..._stream_with_co___long_names_partition.sql | 19 ++ ..._stream_with_co___names_partition_data.sql | 18 ++ ..._stream_with_co__ion_double_array_data.sql | 18 ++ ..._stream_with_co__lting_into_long_names.sql | 22 +++ .../models/generated/sources.yml | 23 +++ ..._stream_with_co_1g_into_long_names_scd.sql | 7 + ..._stream_with_co___long_names_partition.sql | 7 + ..._stream_with_co___names_partition_data.sql | 7 + ..._stream_with_co__ion_double_array_data.sql | 7 + ..._stream_with_co__lting_into_long_names.sql | 7 + .../tidb/test_simple_streams/dbt_project.yml | 86 +++++++++ .../dedup_exchange_rate_scd.sql | 20 ++ .../dedup_exchange_rate.sql | 20 ++ .../test_normalization/exchange_rate.sql | 17 ++ .../dedup_exchange_rate_stg.sql | 112 +++++++++++ .../multiple_column_names_conflicts_stg.sql | 103 ++++++++++ .../dedup_exchange_rate_ab1.sql | 24 +++ .../dedup_exchange_rate_ab2.sql | 27 +++ .../dedup_exchange_rate_scd.sql | 176 ++++++++++++++++++ .../dedup_exchange_rate.sql | 27 +++ .../test_normalization/exchange_rate.sql | 46 +++++ .../dedup_exchange_rate_stg.sql | 24 +++ .../models/generated/sources.yml | 16 ++ .../dedup_exchange_rate_scd.sql | 7 + .../dedup_exchange_rate.sql | 7 + .../test_normalization/exchange_rate.sql | 17 ++ .../dedup_exchange_rate_stg.sql | 112 +++++++++++ .../destination_catalog.json | 14 +- .../messages1.txt | 8 +- .../messages2.txt | 8 +- .../data_input/catalog.json | 69 ++++--- .../data_input/messages.txt | 22 +-- .../data_input/messages_incremental.txt | 28 +-- .../data_input/test_drop_scd_catalog.json | 18 +- .../test_drop_scd_catalog_incremental.json | 18 +- .../test_drop_scd_catalog_reset.json | 18 +- .../data_input/test_drop_scd_messages.txt | 10 +- .../test_scd_reset_messages_incremental.txt | 12 +- .../data_input/catalog.json | 115 ++++++------ .../data_input/catalog_schema_change.json | 58 +++--- .../data_input/messages.txt | 104 +++++------ .../data_input/messages_incremental.txt | 42 ++--- .../data_input/messages_schema_change.txt | 24 +-- .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../types_testing_binary_values.sql | 14 ++ .../integration_tests/test_ephemeral.py | 4 +- .../integration_tests/test_normalization.py | 79 +++++++- .../normalization/data_type.py | 20 ++ .../transform_catalog/stream_processor.py | 144 ++++++++++---- .../normalization/transform_catalog/utils.py | 101 +++++----- ...long_name_truncate_collisions_catalog.json | 6 +- .../unit_tests/resources/nested_catalog.json | 138 +++++++------- .../un-nesting_collisions_catalog.json | 10 +- .../unit_tests/test_stream_processor.py | 16 +- 234 files changed, 4175 insertions(+), 1043 deletions(-) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql (85%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql (83%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql (78%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql (78%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql (83%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql (94%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql (92%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names.sql (88%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql (94%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/{test_normalization => test_normalization_iprwf}/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/{test_normalization => test_normalization_fxlbt}/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql (50%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/{test_normalization => test_normalization_fxlbt}/nested_stream_with_complex_columns_resulting_into_long_names.sql (50%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_scd.sql (89%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/{test_normalization => test_normalization_spffv}/dedup_exchange_rate.sql (58%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/{second_output/airbyte_tables/test_normalization => first_output/airbyte_tables/test_normalization_spffv}/exchange_rate.sql (66%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_stg.sql (88%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/{test_normalization => test_normalization_spffv}/multiple_column_names_conflicts_stg.sql (87%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/{second_output/airbyte_views/test_normalization => first_output/airbyte_views/test_normalization_vorny}/dedup_exchange_rate_stg.sql (88%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/multiple_column_names_conflicts_stg.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_ab1.sql (87%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_ab2.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_scd.sql (92%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/{test_normalization => test_normalization_spffv}/dedup_exchange_rate.sql (87%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_stg.sql (93%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_ab1.sql (87%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_ab2.sql (95%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_scd.sql (92%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/{test_normalization => test_normalization_spffv}/dedup_exchange_rate.sql (87%) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_stg.sql (93%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_scd.sql (52%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/{test_normalization => test_normalization_spffv}/dedup_exchange_rate.sql (52%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_scd.sql (52%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/{test_normalization => test_normalization_spffv}/dedup_exchange_rate.sql (52%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/{test_normalization => test_normalization_spffv}/exchange_rate.sql (89%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/{first_output/airbyte_tables/test_normalization => third_output/airbyte_tables/test_normalization_vorny}/exchange_rate.sql (71%) rename airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/{test_normalization => test_normalization_spffv}/dedup_exchange_rate_stg.sql (88%) create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql create mode 100755 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/dbt_project.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/sources.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql create mode 100755 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/dbt_project.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/sources.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_bigquery_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_clickhouse_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mssql_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mysql_tidb_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_oracle_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_postgres_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_redshift_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_snowflake_tmp/types_testing_binary_values.sql create mode 100644 airbyte-integrations/bases/base-normalization/normalization/data_type.py diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql index 42f5312b054f7..f868727464bd6 100755 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql @@ -8,6 +8,14 @@ string {% endmacro %} +{%- macro type_binary() -%} + {{ adapter.dispatch('type_binary')() }} +{%- endmacro -%} + +{%- macro default__type_binary() -%} + binary +{%- endmacro -%} + {%- macro redshift__type_json() -%} {%- if redshift_super_type() -%} super @@ -72,6 +80,28 @@ char(1000) {%- endmacro -%} +{# binary data ------------------------------------------------- #} + +{%- macro postgres__type_binary() -%} + bytea +{%- endmacro -%} + +{%- macro bigquery__type_binary() -%} + bytes +{%- endmacro -%} + +{%- macro mssql__type_binary() -%} + VARBINARY(MAX) +{%- endmacro -%} + +{%- macro snowflake__type_binary() -%} + VARBINARY +{%- endmacro -%} + +{%- macro clickhouse__type_binary() -%} + VARBINARY +{%- endmacro -%} + {# float ------------------------------------------------- #} {% macro mysql__type_float() %} float diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 1df163184ca05..a94acefbf5cc7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -34,12 +34,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 3d32bbb2838a9..9309b066c3a94 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -24,6 +24,7 @@ select json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz, json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz, json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz, + json_extract_scalar(_airbyte_data, "$['property_binary_data']") as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, CURRENT_TIMESTAMP() as _airbyte_normalized_at @@ -74,6 +75,7 @@ select cast(nullif(time_no_tz, '') as time ) as time_no_tz, + cast(FROM_BASE64(property_binary_data) as bytes) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, CURRENT_TIMESTAMP() as _airbyte_normalized_at @@ -111,6 +113,8 @@ select string ), ''), '-', coalesce(cast(time_no_tz as string +), ''), '-', coalesce(cast(property_binary_data as + string ), '')) as string ))) as _airbyte_exchange_rate_hashid, @@ -134,6 +138,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, CURRENT_TIMESTAMP() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index ce21bef8c7221..2d33e1331c8fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -34,12 +34,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 61b42d20863c3..0cb9f41b4faf3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -3,6 +3,22 @@ partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -21,6 +37,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 4f6b80934992c..6dca5a736c9ae 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -34,12 +34,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 84cb4985e8c95..b9817a4de3a01 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -3,6 +3,22 @@ partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 3d32bbb2838a9..9309b066c3a94 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -24,6 +24,7 @@ select json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz, json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz, json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz, + json_extract_scalar(_airbyte_data, "$['property_binary_data']") as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, CURRENT_TIMESTAMP() as _airbyte_normalized_at @@ -74,6 +75,7 @@ select cast(nullif(time_no_tz, '') as time ) as time_no_tz, + cast(FROM_BASE64(property_binary_data) as bytes) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, CURRENT_TIMESTAMP() as _airbyte_normalized_at @@ -111,6 +113,8 @@ select string ), ''), '-', coalesce(cast(time_no_tz as string +), ''), '-', coalesce(cast(property_binary_data as + string ), '')) as string ))) as _airbyte_exchange_rate_hashid, @@ -134,6 +138,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, CURRENT_TIMESTAMP() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 2609c12f32d36..1cee42ac010eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -1,6 +1,6 @@ - create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp + create view _airbyte_test_normalization.dedup_exchange_rate_ab1 as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 07778080d6faa..1a5e6ad10142b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -1,26 +1,26 @@ - create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp + create view _airbyte_test_normalization.dedup_exchange_rate_ab2 as ( -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab1 select - accurateCastOrNull(id, ' + accurateCastOrNull(trim(BOTH '"' from id), ' BIGINT ') as id, nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date, parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' + accurateCastOrNull(trim(BOTH '"' from "HKD@spéçiäl & characters"), ' Float64 ') as "HKD@spéçiäl & characters", nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' + accurateCastOrNull(trim(BOTH '"' from NZD), ' Float64 ') as NZD, - accurateCastOrNull(USD, ' + accurateCastOrNull(trim(BOTH '"' from USD), ' Float64 ') as USD, _airbyte_ab_id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 7dac7b7d793f6..654648c600c62 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -1,5 +1,7 @@ - + + + insert into test_normalization.dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") @@ -101,4 +103,4 @@ select _airbyte_dedup_cdc_excluded_hashid from dedup_data where _airbyte_row_num = 1 - \ No newline at end of file + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a3527b053dc31..57682bc247c32 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,5 +1,7 @@ - + + + insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") @@ -105,4 +107,4 @@ select _airbyte_dedup_exchange_rate_hashid from dedup_data where _airbyte_row_num = 1 - \ No newline at end of file + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index cf48610f8b82c..f56e43bdf65ba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -1,5 +1,7 @@ - + + + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") @@ -87,4 +89,4 @@ select _airbyte_renamed_dedup_cdc_excluded_hashid from dedup_data where _airbyte_row_num = 1 - \ No newline at end of file + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 11d81fef34b9b..e2340bdfe303c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -1,5 +1,7 @@ - + + + insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") @@ -26,4 +28,4 @@ where 1 = 1 and _airbyte_active_row = 1 - \ No newline at end of file + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index b237171bc7fe8..daf0b99595687 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -1,5 +1,7 @@ - + + + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") @@ -20,4 +22,4 @@ where 1 = 1 and _airbyte_active_row = 1 - \ No newline at end of file + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index c2be71e63fc94..46e79d1d45cb2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -1,7 +1,9 @@ + + - insert into test_normalization.exchange_rate__dbt_tmp ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") + insert into test_normalization.exchange_rate ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "property_binary_data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") -- Final base SQL model -- depends_on: _airbyte_test_normalization.exchange_rate_ab3 @@ -19,6 +21,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 9a932053975b7..98084e148f4a5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -1,6 +1,6 @@ - create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp + create view _airbyte_test_normalization.dedup_exchange_rate_stg as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql index 5f10629995793..57a8d9b9532d6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -1,6 +1,6 @@ - create view _airbyte_test_normalization.multiple_column_names_conflicts_stg__dbt_tmp + create view _airbyte_test_normalization.multiple_column_names_conflicts_stg as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 22f82153a5cd8..fb6b3673aaa28 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -6,14 +6,14 @@ -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: {{ ref('dedup_exchange_rate_ab1') }} select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + accurateCastOrNull(trim(BOTH '"' from id), '{{ dbt_utils.type_bigint() }}') as id, nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }}))) as date, parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, - accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, + accurateCastOrNull(trim(BOTH '"' from {{ quote('HKD@spéçiäl & characters') }}), '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, - accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + accurateCastOrNull(trim(BOTH '"' from NZD), '{{ dbt_utils.type_float() }}') as NZD, + accurateCastOrNull(trim(BOTH '"' from USD), '{{ dbt_utils.type_float() }}') as USD, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index c6885e98962eb..211ba2da4a466 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -6,8 +6,8 @@ -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} select - accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, - accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, + accurateCastOrNull(trim(BOTH '"' from id), '{{ dbt_utils.type_bigint() }}') as id, + accurateCastOrNull(trim(BOTH '"' from _ab_cdc_updated_at), '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index c66443b3a1501..1bd94c36e50fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -1,6 +1,22 @@ {{ config( unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -19,6 +35,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index 07778080d6faa..5bad46684b291 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -7,20 +7,20 @@ -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab1 select - accurateCastOrNull(id, ' + accurateCastOrNull(trim(BOTH '"' from id), ' BIGINT ') as id, nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date, parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' + accurateCastOrNull(trim(BOTH '"' from "HKD@spéçiäl & characters"), ' Float64 ') as "HKD@spéçiäl & characters", nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' + accurateCastOrNull(trim(BOTH '"' from NZD), ' Float64 ') as NZD, - accurateCastOrNull(USD, ' + accurateCastOrNull(trim(BOTH '"' from USD), ' Float64 ') as USD, _airbyte_ab_id, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a793d7412e483..2d4ac4e196eaf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -1,6 +1,3 @@ - insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from dedup_exchange_rate_scd__dbt_tmp - - \ No newline at end of file + create table test_normalization.dedup_exchange_rate_scd__dbt_tmp as test_normalization.dedup_exchange_rate_scd__dbt_new_data + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 8f84c4f3c1620..59a6053de99cf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -1,6 +1,3 @@ - insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from renamed_dedup_cdc_excluded_scd__dbt_tmp - - \ No newline at end of file + create table test_normalization.renamed_dedup_cdc_excluded_scd__dbt_tmp as test_normalization.renamed_dedup_cdc_excluded_scd__dbt_new_data + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql index 4a895d6cf480a..e34e478163418 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -1,6 +1,3 @@ - insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") - select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" - from dedup_exchange_rate__dbt_tmp - - \ No newline at end of file + create table test_normalization.dedup_exchange_rate__dbt_tmp as test_normalization.dedup_exchange_rate__dbt_new_data + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 1b96d3f87152e..5d6352d6aa6dd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -1,6 +1,3 @@ - insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" - from renamed_dedup_cdc_excluded__dbt_tmp - - \ No newline at end of file + create table test_normalization.renamed_dedup_cdc_excluded__dbt_tmp as test_normalization.renamed_dedup_cdc_excluded__dbt_new_data + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index c2be71e63fc94..a8075c3f95bbc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -1,7 +1,9 @@ + + - insert into test_normalization.exchange_rate__dbt_tmp ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") + insert into test_normalization.exchange_rate__dbt_backup ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "property_binary_data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") -- Final base SQL model -- depends_on: _airbyte_test_normalization.exchange_rate_ab3 @@ -19,6 +21,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql index 9a932053975b7..98084e148f4a5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -1,6 +1,6 @@ - create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp + create view _airbyte_test_normalization.dedup_exchange_rate_stg as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml index 8ed082f367749..9e5be22d283bb 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" +name: airbyte_utils +version: '1.0' config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -56,6 +40,82 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - vars: - dbt_utils_dispatch_list: ["airbyte_utils"] + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + nested_stream_with_co__lting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__lting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__lting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__lting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__lting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_wit__lting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wit__lting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wit__lting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wit__lting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_na__lting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_na__lting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_na__lting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_na__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + arrays_ab1: test_normalization._airbyte_raw_arrays + arrays_ab2: test_normalization._airbyte_raw_arrays + arrays_ab3: test_normalization._airbyte_raw_arrays + arrays: test_normalization._airbyte_raw_arrays + nested_stream_with_co___long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization._airbyte_raw_arrays + nested_stream_with_co__ion_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__ion_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__ion_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co__ion_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___names_partition_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___names_partition_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___names_partition_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_co___names_partition_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name____conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name____conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name____conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name____conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql index b4683a3ea301c..33fe9d4856b1a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -32,12 +32,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml index 92fa4c9a2580e..29bae1b4b5105 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml @@ -6,6 +6,7 @@ sources: schema: false identifier: false tables: + - name: _airbyte_raw_arrays - name: _airbyte_raw_conflict_stream_array - name: _airbyte_raw_conflict_stream_name - name: _airbyte_raw_conflict_stream_scalar diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml index 8ed082f367749..70f5208e5e5da 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" +name: airbyte_utils +version: '1.0' config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "/dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -56,6 +40,47 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - vars: - dbt_utils_dispatch_list: ["airbyte_utils"] + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + _1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + _1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + _1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + _1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + _1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization._airbyte_raw_types_testing + types_testing_ab2: test_normalization._airbyte_raw_types_testing + types_testing_stg: test_normalization._airbyte_raw_types_testing + types_testing_scd: test_normalization._airbyte_raw_types_testing + types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 830e76c6f0ef8..0532716f3b180 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -37,6 +37,7 @@ select json_value(_airbyte_data, ''$."datetime_no_tz"'') as datetime_no_tz, json_value(_airbyte_data, ''$."time_tz"'') as time_tz, json_value(_airbyte_data, ''$."time_no_tz"'') as time_no_tz, + json_value(_airbyte_data, ''$."property_binary_data"'') as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, SYSDATETIME() as _airbyte_normalized_at @@ -74,6 +75,7 @@ select cast(nullif(time_no_tz, '''') as time ) as time_no_tz, + CAST(property_binary_data as XML ).value(''.'',''binary'') as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, SYSDATETIME() as _airbyte_normalized_at @@ -102,6 +104,7 @@ select NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_no_tz as NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_tz as NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_no_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(property_binary_data as NVARCHAR(max)), ''''),''''), '''') as NVARCHAR(max)), '''')), 2) as _airbyte_exchange_rate_hashid, tmp.* @@ -124,6 +127,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, SYSDATETIME() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 8d96481142613..835580aa0a069 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -32,12 +32,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 8a74de4c15332..1446314e40e67 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -1,6 +1,22 @@ {{ config( unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -19,6 +35,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml index 97bf0d05cbd40..f51802427655e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml @@ -13,3 +13,4 @@ sources: - name: _airbyte_raw_multiple_column_names_conflicts - name: _airbyte_raw_pos_dedup_cdcx - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 830e76c6f0ef8..0532716f3b180 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -37,6 +37,7 @@ select json_value(_airbyte_data, ''$."datetime_no_tz"'') as datetime_no_tz, json_value(_airbyte_data, ''$."time_tz"'') as time_tz, json_value(_airbyte_data, ''$."time_no_tz"'') as time_no_tz, + json_value(_airbyte_data, ''$."property_binary_data"'') as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, SYSDATETIME() as _airbyte_normalized_at @@ -74,6 +75,7 @@ select cast(nullif(time_no_tz, '''') as time ) as time_no_tz, + CAST(property_binary_data as XML ).value(''.'',''binary'') as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, SYSDATETIME() as _airbyte_normalized_at @@ -102,6 +104,7 @@ select NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_no_tz as NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_tz as NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_no_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(property_binary_data as NVARCHAR(max)), ''''),''''), '''') as NVARCHAR(max)), '''')), 2) as _airbyte_exchange_rate_hashid, tmp.* @@ -124,6 +127,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, SYSDATETIME() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 9ffb6bd5558cc..f4b9f7bb2f074 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -32,12 +32,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 540fc0e7911f6..200429b86c977 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -3,41 +3,43 @@ create table test_normalization.`exchange_rate__dbt_tmp` as ( - + with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: test_normalization._airbyte_raw_exchange_rate select - json_value(_airbyte_data, + json_value(_airbyte_data, '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."currency"' RETURNING CHAR) as currency, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."date"' RETURNING CHAR) as `date`, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."timestamp_col"' RETURNING CHAR) as timestamp_col, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."NZD"' RETURNING CHAR) as nzd, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."USD"' RETURNING CHAR) as usd, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."column___with__quotes"' RETURNING CHAR) as `column__'with"_quotes`, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."datetime_tz"' RETURNING CHAR) as datetime_tz, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."datetime_no_tz"' RETURNING CHAR) as datetime_no_tz, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."time_tz"' RETURNING CHAR) as time_tz, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."time_no_tz"' RETURNING CHAR) as time_no_tz, + json_value(_airbyte_data, + '$."property_binary_data"' RETURNING CHAR) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, - + CURRENT_TIMESTAMP as _airbyte_normalized_at from test_normalization._airbyte_raw_exchange_rate as table_alias @@ -48,7 +50,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__exchange_rate_ab1 select - cast(id as + cast(id as signed ) as id, cast(currency as char(1024)) as currency, @@ -57,14 +59,14 @@ select end as `date` , cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, - cast(`HKD@spéçiäl & characters` as + cast(`HKD@spéçiäl & characters` as float ) as `HKD@spéçiäl & characters`, cast(hkd_special___characters as char(1024)) as hkd_special___characters, - cast(nzd as + cast(nzd as float ) as nzd, - cast(usd as + cast(usd as float ) as usd, cast(`column__'with"_quotes` as char(1024)) as `column__'with"_quotes`, @@ -74,12 +76,13 @@ select end as datetime_no_tz , nullif(cast(time_tz as char(1024)), "") as time_tz, - nullif(cast(time_no_tz as + nullif(cast(time_no_tz as time ), "") as time_no_tz, + cast(FROM_BASE64(property_binary_data) as binary) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, - + CURRENT_TIMESTAMP as _airbyte_normalized_at from __dbt__cte__exchange_rate_ab1 @@ -90,7 +93,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), '')) as char)) as _airbyte_exchange_rate_hashid, + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), ''), '-', coalesce(cast(property_binary_data as char), '')) as char)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate @@ -111,9 +114,10 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, - + CURRENT_TIMESTAMP as _airbyte_normalized_at, _airbyte_exchange_rate_hashid diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index b1c2af62e4bf1..499ae700143f8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -32,12 +32,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 3fe3205727b89..bc56f1c5a3cc9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -1,6 +1,22 @@ {{ config( unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -19,6 +35,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 540fc0e7911f6..200429b86c977 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -3,41 +3,43 @@ create table test_normalization.`exchange_rate__dbt_tmp` as ( - + with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -- depends_on: test_normalization._airbyte_raw_exchange_rate select - json_value(_airbyte_data, + json_value(_airbyte_data, '$."id"' RETURNING CHAR) as id, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."currency"' RETURNING CHAR) as currency, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."date"' RETURNING CHAR) as `date`, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."timestamp_col"' RETURNING CHAR) as timestamp_col, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."NZD"' RETURNING CHAR) as nzd, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."USD"' RETURNING CHAR) as usd, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."column___with__quotes"' RETURNING CHAR) as `column__'with"_quotes`, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."datetime_tz"' RETURNING CHAR) as datetime_tz, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."datetime_no_tz"' RETURNING CHAR) as datetime_no_tz, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."time_tz"' RETURNING CHAR) as time_tz, - json_value(_airbyte_data, + json_value(_airbyte_data, '$."time_no_tz"' RETURNING CHAR) as time_no_tz, + json_value(_airbyte_data, + '$."property_binary_data"' RETURNING CHAR) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, - + CURRENT_TIMESTAMP as _airbyte_normalized_at from test_normalization._airbyte_raw_exchange_rate as table_alias @@ -48,7 +50,7 @@ where 1 = 1 -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: __dbt__cte__exchange_rate_ab1 select - cast(id as + cast(id as signed ) as id, cast(currency as char(1024)) as currency, @@ -57,14 +59,14 @@ select end as `date` , cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, - cast(`HKD@spéçiäl & characters` as + cast(`HKD@spéçiäl & characters` as float ) as `HKD@spéçiäl & characters`, cast(hkd_special___characters as char(1024)) as hkd_special___characters, - cast(nzd as + cast(nzd as float ) as nzd, - cast(usd as + cast(usd as float ) as usd, cast(`column__'with"_quotes` as char(1024)) as `column__'with"_quotes`, @@ -74,12 +76,13 @@ select end as datetime_no_tz , nullif(cast(time_tz as char(1024)), "") as time_tz, - nullif(cast(time_no_tz as + nullif(cast(time_no_tz as time ), "") as time_no_tz, + cast(FROM_BASE64(property_binary_data) as binary) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, - + CURRENT_TIMESTAMP as _airbyte_normalized_at from __dbt__cte__exchange_rate_ab1 @@ -90,7 +93,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), '')) as char)) as _airbyte_exchange_rate_hashid, + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), ''), '-', coalesce(cast(property_binary_data as char), '')) as char)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate @@ -111,9 +114,10 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, - + CURRENT_TIMESTAMP as _airbyte_normalized_at, _airbyte_exchange_rate_hashid diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 4292befa848b8..ae518b148c97d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -22,6 +22,7 @@ select json_value("_AIRBYTE_DATA", '$."datetime_no_tz"') as datetime_no_tz, json_value("_AIRBYTE_DATA", '$."time_tz"') as time_tz, json_value("_AIRBYTE_DATA", '$."time_no_tz"') as time_no_tz, + json_value("_AIRBYTE_DATA", '$."property_binary_data"') as property_binary_data, "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", @@ -68,6 +69,7 @@ select cast(nullif(time_no_tz, '') as varchar2(4000) ) as time_no_tz, + cast(property_binary_data as varchar2(4000)) as property_binary_data, "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", @@ -119,7 +121,10 @@ select time_tz || '~' || - time_no_tz + time_no_tz || '~' || + + + property_binary_data ) as "_AIRBYTE_EXCHANGE_RATE_HASHID", tmp.* @@ -142,6 +147,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 9320dbc51f60f..2cc63b91fa44a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -32,12 +32,12 @@ from ( select distinct {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key from {{ this }} - where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), quote(this.schema) + '.' + quote('dedup_exchange_rate')) }} ) recent_records left join ( select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as active_count from {{ this }} - where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), quote(this.schema) + '.' + quote('dedup_exchange_rate')) }} group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 2fa5061764670..d1c13cf50a649 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -1,6 +1,22 @@ {{ config( unique_key = quote('_AIRBYTE_AB_ID'), schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -19,6 +35,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, {{ quote('_AIRBYTE_AB_ID') }}, {{ quote('_AIRBYTE_EMITTED_AT') }}, {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 4292befa848b8..ae518b148c97d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -22,6 +22,7 @@ select json_value("_AIRBYTE_DATA", '$."datetime_no_tz"') as datetime_no_tz, json_value("_AIRBYTE_DATA", '$."time_tz"') as time_tz, json_value("_AIRBYTE_DATA", '$."time_no_tz"') as time_no_tz, + json_value("_AIRBYTE_DATA", '$."property_binary_data"') as property_binary_data, "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", @@ -68,6 +69,7 @@ select cast(nullif(time_no_tz, '') as varchar2(4000) ) as time_no_tz, + cast(property_binary_data as varchar2(4000)) as property_binary_data, "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", @@ -119,7 +121,10 @@ select time_tz || '~' || - time_no_tz + time_no_tz || '~' || + + + property_binary_data ) as "_AIRBYTE_EXCHANGE_RATE_HASHID", tmp.* @@ -142,6 +147,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 5eaf6186aaab4..fe7ee3aa77ca8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index c35233d432cb3..bbca6ea472546 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('some_stream_that_was_empty')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('some_stream_that_was_empty')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql index 875d028168620..b5f10293fddf0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='arrays_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql index 73f13e380ac25..f3cfcf08e5175 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql @@ -1,6 +1,22 @@ {{ config( indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='arrays_nested_array_parent_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql index ede71a891dc05..fb14fc0ec02f6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='conflict_stream_array_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql index f203166febe17..518ddd4c80953 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='conflict_stream_name_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql index 2c221c2940b75..b83744e195fce 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql @@ -1,6 +1,22 @@ {{ config( indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='conflict_stream_name___conflict_stream_name_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql index 195d067ffe415..02b9a967edd16 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql @@ -1,6 +1,22 @@ {{ config( indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='conflict_stream_name_conflict_stream_name_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql index 31f263905b533..d737ff5acba8e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='conflict_stream_scalar_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql index 8b4cddcd4b179..6451ca096b10e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='non_nested_stream_wi__lting_into_long_names_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql index 7c113e7291b5d..f1bacbbd7c45d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='unnest_alias_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql index ae4165f58160f..2acf4b2bba576 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql @@ -1,6 +1,22 @@ {{ config( indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='unnest_alias_childre__column___with__quotes_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql index 9f98219880ec5..a86cbfcd476ce 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql @@ -1,6 +1,22 @@ {{ config( indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='unnest_alias_children_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql index 14c766c3dd59f..8fb89e2f4d148 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql @@ -1,6 +1,22 @@ {{ config( indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='unnest_alias_children_owner_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql index 2773af0d8fa35..e768371a824ca 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -21,6 +21,7 @@ select jsonb_extract_path_text(_airbyte_data, 'datetime_no_tz') as datetime_no_tz, jsonb_extract_path_text(_airbyte_data, 'time_tz') as time_tz, jsonb_extract_path_text(_airbyte_data, 'time_no_tz') as time_no_tz, + jsonb_extract_path_text(_airbyte_data, 'property_binary_data') as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -65,6 +66,7 @@ select cast(nullif(time_no_tz, '') as time ) as time_no_tz, + cast(decode(property_binary_data, 'base64') as bytea) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -76,7 +78,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') as text)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') || '-' || coalesce(cast(property_binary_data as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate @@ -97,6 +99,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql index ca2b2520a2585..0041cccaa66c9 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -20,6 +20,7 @@ select {{ json_extract_scalar('_airbyte_data', ['datetime_no_tz'], ['datetime_no_tz']) }} as datetime_no_tz, {{ json_extract_scalar('_airbyte_data', ['time_tz'], ['time_tz']) }} as time_tz, {{ json_extract_scalar('_airbyte_data', ['time_no_tz'], ['time_no_tz']) }} as time_no_tz, + {{ json_extract_scalar('_airbyte_data', ['property_binary_data'], ['property_binary_data']) }} as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql index 0f457acbee982..e64542dc6d9b0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -20,6 +20,7 @@ select cast({{ empty_string_to_null('datetime_no_tz') }} as {{ type_timestamp_without_timezone() }}) as datetime_no_tz, cast({{ empty_string_to_null('time_tz') }} as {{ type_time_with_timezone() }}) as time_tz, cast({{ empty_string_to_null('time_no_tz') }} as {{ type_time_without_timezone() }}) as time_no_tz, + cast(decode(property_binary_data, 'base64') as {{ type_binary() }}) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql index 789086fe147aa..fe3e0c53b7657 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -21,6 +21,7 @@ select 'datetime_no_tz', 'time_tz', 'time_no_tz', + 'property_binary_data', ]) }} as _airbyte_exchange_rate_hashid, tmp.* from {{ ref('exchange_rate_ab2') }} tmp diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql index 01e0c49d1c7c4..ba70ca94c1f11 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('1_prefix_startwith_number')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('1_prefix_startwith_number')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 5affe9825e3be..c91e78cb81c28 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_cdc_excluded')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_cdc_excluded')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index ef0cf7e1e95f5..d0e7f0fd4a512 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql index 77d393c856892..ab9ddd969880b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('multiple_column_names_conflicts')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('multiple_column_names_conflicts')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index ff471c6abaab1..074fd47f183fa 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('pos_dedup_cdcx')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('pos_dedup_cdcx')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index d8da713c68711..4ccea5b060a5c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql index 0a0b409c90b72..158c69afaa872 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('types_testing')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('types_testing')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('types_testing')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('types_testing')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 72e4956780448..b1542a35dd2f3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -20,6 +36,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 5affe9825e3be..c91e78cb81c28 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_cdc_excluded')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_cdc_excluded')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 7e6225fb7cfc4..f37cdb29d9354 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 96f720b3d2659..1d8d084ad443f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql index 40b5ffb3f87d9..3ac92c191bf42 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -2,6 +2,22 @@ indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], unique_key = '_airbyte_ab_id', schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql index 2773af0d8fa35..e768371a824ca 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -21,6 +21,7 @@ select jsonb_extract_path_text(_airbyte_data, 'datetime_no_tz') as datetime_no_tz, jsonb_extract_path_text(_airbyte_data, 'time_tz') as time_tz, jsonb_extract_path_text(_airbyte_data, 'time_no_tz') as time_no_tz, + jsonb_extract_path_text(_airbyte_data, 'property_binary_data') as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -65,6 +66,7 @@ select cast(nullif(time_no_tz, '') as time ) as time_no_tz, + cast(decode(property_binary_data, 'base64') as bytea) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -76,7 +78,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') as text)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') || '-' || coalesce(cast(property_binary_data as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate @@ -97,6 +99,7 @@ select datetime_no_tz, time_tz, time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml index 767544968e0b7..8fdb7a50a6977 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml @@ -51,77 +51,77 @@ dispatch: vars: json_column: _airbyte_data models_to_source: - nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - non_nested_stream_without_namespace_resulting_into_long_names: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - some_stream_that_was_empty_ab1: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_ab2: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_stg: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty_scd: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty - some_stream_that_was_empty: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty + nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization_iprwf._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization_iprwf._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization_iprwf._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names: test_normalization_iprwf._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization_iprwf._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization_iprwf._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization_iprwf._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization_iprwf._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization_iprwf._airbyte_raw_some_stream_that_was_empty simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names - conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_scalar_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_scalar: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar - conflict_stream_array_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - conflict_stream_array_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - conflict_stream_array_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - conflict_stream_array: test_normalization_xjvlg._airbyte_raw_conflict_stream_array - unnest_alias_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias: test_normalization_xjvlg._airbyte_raw_unnest_alias - arrays_ab1: test_normalization_xjvlg._airbyte_raw_arrays - arrays_ab2: test_normalization_xjvlg._airbyte_raw_arrays - arrays_ab3: test_normalization_xjvlg._airbyte_raw_arrays - arrays: test_normalization_xjvlg._airbyte_raw_arrays - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - unnest_alias_children_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children: test_normalization_xjvlg._airbyte_raw_unnest_alias - arrays_nested_array_parent_ab1: test_normalization_xjvlg._airbyte_raw_arrays - arrays_nested_array_parent_ab2: test_normalization_xjvlg._airbyte_raw_arrays - arrays_nested_array_parent_ab3: test_normalization_xjvlg._airbyte_raw_arrays - arrays_nested_array_parent: test_normalization_xjvlg._airbyte_raw_arrays - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - nested_stream_with_complex_columns_resulting_into_long_names_partition_data: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name - unnest_alias_children_owner_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias - unnest_alias_children_owner_column___with__quotes: test_normalization_xjvlg._airbyte_raw_unnest_alias + conflict_stream_name_ab1: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization_iprwf._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization_iprwf._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization_iprwf._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization_iprwf._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization_iprwf._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization_iprwf._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization_iprwf._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization_iprwf._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias: test_normalization_iprwf._airbyte_raw_unnest_alias + arrays_ab1: test_normalization_iprwf._airbyte_raw_arrays + arrays_ab2: test_normalization_iprwf._airbyte_raw_arrays + arrays_ab3: test_normalization_iprwf._airbyte_raw_arrays + arrays: test_normalization_iprwf._airbyte_raw_arrays + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization_iprwf._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization_iprwf._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization_iprwf._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization_iprwf._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization_iprwf._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization_iprwf._airbyte_raw_arrays + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data: test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization_iprwf._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization_iprwf._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab1: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab2: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab3: test_normalization_iprwf._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes: test_normalization_iprwf._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..9e9a1605e83cb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "nested_stream_with_complex_columns_resulti__dbt_tmp185252546953" + ); + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp185252546953" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql similarity index 85% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 9b59d6d77c88c..272ce14755843 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -2,7 +2,7 @@ create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" + "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" compound sortkey(_airbyte_active_row,_airbyte_unique_key_scd,_airbyte_emitted_at) @@ -14,8 +14,8 @@ with input_data as ( select * - from "integrationtests"._airbyte_test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_stg" - -- nested_stream_with_complex_columns_resulting_into_long_names from "integrationtests".test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + from "normalization_tests"._airbyte_test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_stg" + -- nested_stream_with_complex_columns_resulting_into_long_names from "normalization_tests".test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names ), scd_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql deleted file mode 100644 index 184fa2bf11042..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - - create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" - - - compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) - - as ( - --- Final base SQL model --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" -select - _airbyte_unique_key, - id, - date, - "partition", - _airbyte_ab_id, - _airbyte_emitted_at, - getdate() as _airbyte_normalized_at, - _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" --- nested_stream_with_complex_columns_resulting_into_long_names from "integrationtests".test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names -where 1 = 1 -and _airbyte_active_row = 1 - - ); - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..e3a9ebcfe2483 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "nested_stream_with_complex_columns_resulti__dbt_tmp185302582647" + ); + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + ( + select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp185302582647" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..0c29b9812ff7a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_partition" ("_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") + ( + select "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp185307922088" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql new file mode 100644 index 0000000000000..63036c1936140 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") + ( + select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp185315344939" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..f752bb2b1b746 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") + ( + select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp185315267996" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..a5df79b64a61e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,29 @@ + + + + create table + "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names" + + + compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) + + as ( + +-- Final base SQL model +-- depends_on: "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" +select + _airbyte_unique_key, + id, + date, + "partition", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" +-- nested_stream_with_complex_columns_resulting_into_long_names from "normalization_tests".test_normalization_iprwf._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql similarity index 83% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql index 4e1c7b1f39427..c14d680fc4a08 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -2,7 +2,7 @@ create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" + "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" compound sortkey(_airbyte_emitted_at) @@ -12,7 +12,7 @@ with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" +-- depends_on: "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" select _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, "partition"."double_array_data" as double_array_data, @@ -20,7 +20,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" as table_alias +from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" as table_alias -- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition where 1 = 1 and "partition" is not null @@ -62,7 +62,7 @@ select getdate() as _airbyte_normalized_at, _airbyte_partition_hashid from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 --- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" where 1 = 1 ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql similarity index 78% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql index e19271e39a6fb..68ff8cc279d96 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -2,7 +2,7 @@ create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" + "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" compound sortkey(_airbyte_emitted_at) @@ -12,13 +12,13 @@ with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" +-- depends_on: "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" with joined as ( select table_alias._airbyte_partition_hashid as _airbyte_hashid, _airbyte_nested_data - from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.data as _airbyte_nested_data + from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.data as _airbyte_nested_data ) select _airbyte_partition_hashid, @@ -26,7 +26,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias +from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias -- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA left join joined on _airbyte_partition_hashid = joined._airbyte_hashid where 1 = 1 @@ -67,7 +67,7 @@ select getdate() as _airbyte_normalized_at, _airbyte_data_hashid from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 --- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" where 1 = 1 ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql similarity index 78% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql index 7e38b76f87fe4..fc777ebad858b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -2,7 +2,7 @@ create table - "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" + "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" compound sortkey(_airbyte_emitted_at) @@ -12,13 +12,13 @@ with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" +-- depends_on: "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" with joined as ( select table_alias._airbyte_partition_hashid as _airbyte_hashid, _airbyte_nested_data - from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.double_array_data as _airbyte_nested_data + from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.double_array_data as _airbyte_nested_data ) select _airbyte_partition_hashid, @@ -26,7 +26,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias +from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias -- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data left join joined on _airbyte_partition_hashid = joined._airbyte_hashid where 1 = 1 @@ -67,7 +67,7 @@ select getdate() as _airbyte_normalized_at, _airbyte_double_array_data_hashid from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 --- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" where 1 = 1 ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql similarity index 83% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql index ed49a5e916064..3fed7058e31b2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql @@ -1,11 +1,11 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_xjvlg", + schema = "_airbyte_test_normalization_iprwf", tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +-- depends_on: {{ source('test_normalization_iprwf', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, @@ -13,7 +13,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias +from {{ source('test_normalization_iprwf', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias -- nested_stream_with_complex_columns_resulting_into_long_names where 1 = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql similarity index 94% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql index 19ab94bca1518..8e3c77dc93ae2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql @@ -1,7 +1,7 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_xjvlg", + schema = "_airbyte_test_normalization_iprwf", tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql index 18a21b4729811..a8ea2379d7b91 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql @@ -1,6 +1,6 @@ {{ config( sort = "_airbyte_emitted_at", - schema = "_airbyte_test_normalization_xjvlg", + schema = "_airbyte_test_normalization_iprwf", tags = [ "nested-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql index 4cc3285a5f6e2..8ea9f6913c788 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql @@ -1,6 +1,6 @@ {{ config( sort = "_airbyte_emitted_at", - schema = "_airbyte_test_normalization_xjvlg", + schema = "_airbyte_test_normalization_iprwf", tags = [ "nested-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql index 4876b27d7cc0f..5211281853088 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql @@ -1,6 +1,6 @@ {{ config( sort = "_airbyte_emitted_at", - schema = "_airbyte_test_normalization_xjvlg", + schema = "_airbyte_test_normalization_iprwf", tags = [ "nested-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql similarity index 92% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index a629e4de4e5d6..5b2e75c95ffd2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -1,7 +1,7 @@ {{ config( sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization_xjvlg", + schema = "test_normalization_iprwf", post_hook = [" {% set final_table_relation = adapter.get_relation( @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key @@ -48,7 +48,7 @@ -- We have to have a non-empty query, so just do a noop delete delete from {{ this }} where 1=0 {% endif %} - ","drop view _airbyte_test_normalization_xjvlg.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + ","drop view _airbyte_test_normalization_iprwf.nested_stream_with_complex_columns_resulting_into_long_names_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') @@ -59,7 +59,7 @@ new_data as ( select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_iprwf', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} ), @@ -95,7 +95,7 @@ input_data as ( input_data as ( select * from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} - -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_iprwf', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} ), {% endif %} scd_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql similarity index 88% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql index f95f159eedc9f..56873588737d6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -1,7 +1,7 @@ {{ config( sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key", - schema = "test_normalization_xjvlg", + schema = "test_normalization_iprwf", tags = [ "top-level" ] ) }} -- Final base SQL model @@ -16,7 +16,7 @@ select {{ current_timestamp() }} as _airbyte_normalized_at, _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} --- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_iprwf', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} where 1 = 1 and _airbyte_active_row = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql index 18a73cf63b7f7..63a9656204a6b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -1,6 +1,6 @@ {{ config( sort = "_airbyte_emitted_at", - schema = "test_normalization_xjvlg", + schema = "test_normalization_iprwf", tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql similarity index 94% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql index ad3d8a9a61b53..9d250f87e157f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -1,6 +1,6 @@ {{ config( sort = "_airbyte_emitted_at", - schema = "test_normalization_xjvlg", + schema = "test_normalization_iprwf", tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql index 2059cb60a01ae..b82979e2eff00 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -1,6 +1,6 @@ {{ config( sort = "_airbyte_emitted_at", - schema = "test_normalization_xjvlg", + schema = "test_normalization_iprwf", tags = [ "nested" ] ) }} -- Final base SQL model diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml index 56faa01c65dc7..9effea5e1d450 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml @@ -1,13 +1,6 @@ version: 2 sources: -- name: test_normalization_namespace - quoting: - database: true - schema: false - identifier: false - tables: - - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names -- name: test_normalization_xjvlg +- name: test_normalization_iprwf quoting: database: true schema: false @@ -21,3 +14,10 @@ sources: - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names - name: _airbyte_raw_some_stream_that_was_empty - name: _airbyte_raw_unnest_alias +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql similarity index 50% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 45c63e057a5ed..5e536f58f6b80 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -1,13 +1,13 @@ - delete from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" + delete from "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_scd" where (_airbyte_unique_key_scd) in ( select (_airbyte_unique_key_scd) from "nested_stream_with_complex_columns_resulti__dbt_tmp" ); - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") ( select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" from "nested_stream_with_complex_columns_resulti__dbt_tmp" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..bfdfff49becd0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ); + + + insert into "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql deleted file mode 100644 index 9944a91ca6425..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" ("_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") - ( - select "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql deleted file mode 100644 index 52b4bd4fc5f41..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") - ( - select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql deleted file mode 100644 index 91aaa5e85cc0a..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql +++ /dev/null @@ -1,9 +0,0 @@ - - - - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") - ( - select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" - from "nested_stream_with_complex_columns_resulti__dbt_tmp" - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql similarity index 50% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql index e32bb140a0990..6a1aaba7abe1b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -1,13 +1,13 @@ - delete from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" + delete from "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names" where (_airbyte_unique_key) in ( select (_airbyte_unique_key) from "nested_stream_with_complex_columns_resulti__dbt_tmp" ); - insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") ( select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" from "nested_stream_with_complex_columns_resulti__dbt_tmp" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..16970f6dddcc9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_partition" ("_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") + ( + select "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql new file mode 100644 index 0000000000000..f755cf01912ed --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") + ( + select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..74f49febfe382 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_fxlbt/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_fxlbt."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") + ( + select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..62f298f7d8157 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ); + + + insert into "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + ( + select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..484c32aeb126f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition" ("_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") + ( + select "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql new file mode 100644 index 0000000000000..15eec675847a5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") + ( + select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..661feb40560e0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization_iprwf/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,9 @@ + + + + insert into "normalization_tests".test_normalization_iprwf."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") + ( + select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml index c645baf3c5fe8..5682537f147df 100755 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml @@ -51,22 +51,22 @@ dispatch: vars: json_column: _airbyte_data models_to_source: - exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate: test_normalization_bhhpj._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + exchange_rate_ab1: test_normalization_spffv._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization_spffv._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization_spffv._airbyte_raw_exchange_rate + exchange_rate: test_normalization_spffv._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml index 70d0b5b4fa3b6..2b0fdef69673b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml @@ -51,42 +51,42 @@ dispatch: vars: json_column: _airbyte_data models_to_source: - exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate_ab3: test_normalization_bhhpj._airbyte_raw_exchange_rate - exchange_rate: test_normalization_bhhpj._airbyte_raw_exchange_rate - dedup_exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_stg: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate_scd: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - dedup_exchange_rate: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate - renamed_dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - renamed_dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded - dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded - pos_dedup_cdcx_ab1: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_ab2: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_stg: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx_scd: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - pos_dedup_cdcx: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx - 1_prefix_startwith_number_ab1: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_ab2: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_stg: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number_scd: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - 1_prefix_startwith_number: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number - multiple_column_names_conflicts_ab1: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_ab2: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_stg: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts_scd: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - multiple_column_names_conflicts: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts - types_testing_ab1: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing_ab2: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing_stg: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing_scd: test_normalization_bhhpj._airbyte_raw_types_testing - types_testing: test_normalization_bhhpj._airbyte_raw_types_testing + exchange_rate_ab1: test_normalization_spffv._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization_spffv._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization_spffv._airbyte_raw_exchange_rate + exchange_rate: test_normalization_spffv._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization_spffv._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization_spffv._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization_spffv._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization_spffv._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization_spffv._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization_spffv._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization_spffv._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization_spffv._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization_spffv._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization_spffv._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization_spffv._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization_spffv._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization_spffv._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization_spffv._airbyte_raw_types_testing + types_testing_ab2: test_normalization_spffv._airbyte_raw_types_testing + types_testing_stg: test_normalization_spffv._airbyte_raw_types_testing + types_testing_scd: test_normalization_spffv._airbyte_raw_types_testing + types_testing: test_normalization_spffv._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql similarity index 89% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql index 3c1032d3297f2..01a95283d6fae 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql @@ -2,7 +2,7 @@ create table - "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" + "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" compound sortkey(_airbyte_active_row,_airbyte_unique_key_scd,_airbyte_emitted_at) @@ -14,8 +14,8 @@ with input_data as ( select * - from "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg" - -- dedup_exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + from "normalization_tests"._airbyte_test_normalization_spffv."dedup_exchange_rate_stg" + -- dedup_exchange_rate from "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate ), scd_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..b7fc853243ac3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_vorny."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp185609343121" + ); + + + insert into "normalization_tests".test_normalization_vorny."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp185609343121" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql similarity index 58% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql index b6903fe4ceb0d..4ce40ca0bb161 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql @@ -2,7 +2,7 @@ create table - "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" + "normalization_tests".test_normalization_spffv."dedup_exchange_rate" compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) @@ -10,7 +10,7 @@ as ( -- Final base SQL model --- depends_on: "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" +-- depends_on: "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" select _airbyte_unique_key, id, @@ -25,8 +25,8 @@ select _airbyte_emitted_at, getdate() as _airbyte_normalized_at, _airbyte_dedup_exchange_rate_hashid -from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" --- dedup_exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +from "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" +-- dedup_exchange_rate from "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate where 1 = 1 and _airbyte_active_row = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..17918c5d32d8b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_vorny."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp185631855953" + ); + + + insert into "normalization_tests".test_normalization_vorny."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp185631855953" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql similarity index 66% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql index e2bd3830cb423..f0cbba5b655d8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql @@ -1,7 +1,7 @@ create table - "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" + "normalization_tests".test_normalization_spffv."exchange_rate__dbt_tmp" compound sortkey(_airbyte_emitted_at) @@ -11,7 +11,7 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, @@ -22,10 +22,15 @@ select case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + case when _airbyte_data."datetime_tz" != '' then _airbyte_data."datetime_tz" end as datetime_tz, + case when _airbyte_data."datetime_no_tz" != '' then _airbyte_data."datetime_no_tz" end as datetime_no_tz, + case when _airbyte_data."time_tz" != '' then _airbyte_data."time_tz" end as time_tz, + case when _airbyte_data."time_no_tz" != '' then _airbyte_data."time_no_tz" end as time_no_tz, + case when _airbyte_data."property_binary_data" != '' then _airbyte_data."property_binary_data" end as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias +from "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate as table_alias -- exchange_rate where 1 = 1 ), __dbt__cte__exchange_rate_ab2 as ( @@ -41,7 +46,7 @@ select date ) as date, cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone + TIMESTAMPTZ ) as timestamp_col, cast("hkd@spéçiäl & characters" as float @@ -54,6 +59,19 @@ select float ) as usd, cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + cast(nullif(datetime_tz::varchar, '') as + TIMESTAMPTZ +) as datetime_tz, + cast(nullif(datetime_no_tz::varchar, '') as + TIMESTAMP +) as datetime_no_tz, + cast(nullif(time_tz::varchar, '') as + TIMETZ +) as time_tz, + cast(nullif(time_no_tz::varchar, '') as + TIME +) as time_no_tz, + cast(property_binary_data as text) as property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at @@ -65,7 +83,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') || '-' || coalesce(cast(property_binary_data as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate @@ -82,11 +100,16 @@ select nzd, usd, "column`_'with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + property_binary_data, _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at, _airbyte_exchange_rate_hashid from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +-- exchange_rate from "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate where 1 = 1 ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql new file mode 100644 index 0000000000000..8e60b4e38a6a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql @@ -0,0 +1,115 @@ + + + create table + "normalization_tests".test_normalization_vorny."exchange_rate__dbt_tmp" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + case when _airbyte_data."datetime_tz" != '' then _airbyte_data."datetime_tz" end as datetime_tz, + case when _airbyte_data."datetime_no_tz" != '' then _airbyte_data."datetime_no_tz" end as datetime_no_tz, + case when _airbyte_data."time_tz" != '' then _airbyte_data."time_tz" end as time_tz, + case when _airbyte_data."time_no_tz" != '' then _airbyte_data."time_no_tz" end as time_no_tz, + case when _airbyte_data."property_binary_data" != '' then _airbyte_data."property_binary_data" end as property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + TIMESTAMPTZ +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + cast(nullif(datetime_tz::varchar, '') as + TIMESTAMPTZ +) as datetime_tz, + cast(nullif(datetime_no_tz::varchar, '') as + TIMESTAMP +) as datetime_no_tz, + cast(nullif(time_tz::varchar, '') as + TIMETZ +) as time_tz, + cast(nullif(time_no_tz::varchar, '') as + TIME +) as time_no_tz, + cast(property_binary_data as text) as property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') || '-' || coalesce(cast(property_binary_data as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql similarity index 88% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql index 903a3141f6256..460ee061096ab 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql @@ -1,11 +1,11 @@ - create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( + create view "normalization_tests"._airbyte_test_normalization_spffv."dedup_exchange_rate_stg__dbt_tmp" as ( with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, @@ -18,7 +18,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias +from "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate as table_alias -- dedup_exchange_rate where 1 = 1 @@ -35,7 +35,7 @@ select date ) as date, cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone + TIMESTAMPTZ ) as timestamp_col, cast("hkd@spéçiäl & characters" as float diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_spffv/multiple_column_names_conflicts_stg.sql similarity index 87% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_spffv/multiple_column_names_conflicts_stg.sql index b496abf0c5ecd..658659ec2b8e2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_spffv/multiple_column_names_conflicts_stg.sql @@ -1,11 +1,11 @@ - create view "integrationtests"._airbyte_test_normalization_bhhpj."multiple_column_names_conflicts_stg__dbt_tmp" as ( + create view "normalization_tests"._airbyte_test_normalization_spffv."multiple_column_names_conflicts_stg__dbt_tmp" as ( with __dbt__cte__multiple_column_names_conflicts_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."User Id" != '' then _airbyte_data."User Id" end as "user id", @@ -17,7 +17,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts as table_alias +from "normalization_tests".test_normalization_spffv._airbyte_raw_multiple_column_names_conflicts as table_alias -- multiple_column_names_conflicts where 1 = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql similarity index 88% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql index 903a3141f6256..7cb258900a1fb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql @@ -1,11 +1,11 @@ - create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( + create view "normalization_tests"._airbyte_test_normalization_vorny."dedup_exchange_rate_stg__dbt_tmp" as ( with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_dedup_exchange_rate select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, @@ -18,7 +18,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias +from "normalization_tests".test_normalization_vorny._airbyte_raw_dedup_exchange_rate as table_alias -- dedup_exchange_rate where 1 = 1 @@ -35,7 +35,7 @@ select date ) as date, cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone + TIMESTAMPTZ ) as timestamp_col, cast("hkd@spéçiäl & characters" as float diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..cdcb4ed52ddbc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization_vorny/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,62 @@ + + + create view "normalization_tests"._airbyte_test_normalization_vorny."multiple_column_names_conflicts_stg__dbt_tmp" as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_multiple_column_names_conflicts +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."User Id" != '' then _airbyte_data."User Id" end as "user id", + case when _airbyte_data."user_id" != '' then _airbyte_data."user_id" end as user_id, + case when _airbyte_data."User id" != '' then _airbyte_data."User id" end as "user id_1", + case when _airbyte_data."user id" != '' then _airbyte_data."user id" end as "user id_2", + case when _airbyte_data."User@Id" != '' then _airbyte_data."User@Id" end as "user@id", + case when _airbyte_data."UserId" != '' then _airbyte_data."UserId" end as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_vorny._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + bigint +) as id, + cast("user id" as text) as "user id", + cast(user_id as + float +) as user_id, + cast("user id_1" as + float +) as "user id_1", + cast("user id_2" as + float +) as "user id_2", + cast("user@id" as text) as "user@id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("user id_1" as text), '') || '-' || coalesce(cast("user id_2" as text), '') || '-' || coalesce(cast("user@id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_column_names_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab1.sql similarity index 87% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab1.sql index b8200f8bf6791..0892b57e079ba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab1.sql @@ -1,11 +1,11 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", + schema = "_airbyte_test_normalization_spffv", tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +-- depends_on: {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, @@ -18,7 +18,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab2.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab2.sql index 420c7c9869752..6b5d4f79462e6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab2.sql @@ -1,7 +1,7 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", + schema = "_airbyte_test_normalization_spffv", tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql similarity index 92% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql index b716e29bdf6ef..febb267c39805 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql @@ -1,7 +1,7 @@ {{ config( sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization_bhhpj", + schema = "test_normalization_spffv", post_hook = [" {% set final_table_relation = adapter.get_relation( @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key @@ -48,7 +48,7 @@ -- We have to have a non-empty query, so just do a noop delete delete from {{ this }} where 1=0 {% endif %} - ","drop view _airbyte_test_normalization_bhhpj.dedup_exchange_rate_stg"], + ","drop view _airbyte_test_normalization_spffv.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') @@ -59,7 +59,7 @@ new_data as ( select * from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} + -- dedup_exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} ), @@ -97,7 +97,7 @@ input_data as ( input_data as ( select * from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} + -- dedup_exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} ), {% endif %} scd_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql similarity index 87% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql index 8f8fd8c8e9bc7..f3ceea61d3af3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql @@ -1,7 +1,7 @@ {{ config( sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key", - schema = "test_normalization_bhhpj", + schema = "test_normalization_spffv", tags = [ "top-level" ] ) }} -- Final base SQL model @@ -21,7 +21,7 @@ select {{ current_timestamp() }} as _airbyte_normalized_at, _airbyte_dedup_exchange_rate_hashid from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +-- dedup_exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index a66a0b168c2e4..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "test_normalization_bhhpj", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql new file mode 100644 index 0000000000000..1a1460e51a6fc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql @@ -0,0 +1,47 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "test_normalization_spffv", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql similarity index 93% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql index db45cc80a67aa..b20dfdcfd9995 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql @@ -1,7 +1,7 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", + schema = "_airbyte_test_normalization_spffv", tags = [ "top-level-intermediate" ] ) }} -- SQL model to build a hash column based on the values of this record diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml index 6aa768851a80c..3b4123fceff3b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml @@ -1,6 +1,6 @@ version: 2 sources: -- name: test_normalization_bhhpj +- name: test_normalization_spffv quoting: database: true schema: false diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab1.sql similarity index 87% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab1.sql index cfb1d029d88ff..ffff62aa4e594 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab1.sql @@ -1,11 +1,11 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", + schema = "_airbyte_test_normalization_spffv", tags = [ "top-level-intermediate" ] ) }} -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +-- depends_on: {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, @@ -18,7 +18,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at -from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} as table_alias -- dedup_exchange_rate where 1 = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab2.sql similarity index 95% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab2.sql index 2a9275c69a1ec..7a309e0955fae 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization_spffv/dedup_exchange_rate_ab2.sql @@ -1,7 +1,7 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", + schema = "_airbyte_test_normalization_spffv", tags = [ "top-level-intermediate" ] ) }} -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql similarity index 92% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql index 9f8c382ff834b..05de45b0a0a7d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql @@ -1,7 +1,7 @@ {{ config( sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization_bhhpj", + schema = "test_normalization_spffv", post_hook = [" {% set final_table_relation = adapter.get_relation( @@ -33,12 +33,12 @@ from ( select distinct _airbyte_unique_key as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} ) recent_records left join ( select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count from {{ this }} - where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} group by _airbyte_unique_key ) active_counts on recent_records.unique_key = active_counts.unique_key @@ -48,7 +48,7 @@ -- We have to have a non-empty query, so just do a noop delete delete from {{ this }} where 1=0 {% endif %} - ","drop view _airbyte_test_normalization_bhhpj.dedup_exchange_rate_stg"], + ","drop view _airbyte_test_normalization_spffv.dedup_exchange_rate_stg"], tags = [ "top-level" ] ) }} -- depends_on: ref('dedup_exchange_rate_stg') @@ -59,7 +59,7 @@ new_data as ( select * from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} + -- dedup_exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} ), @@ -97,7 +97,7 @@ input_data as ( input_data as ( select * from {{ ref('dedup_exchange_rate_stg') }} - -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} + -- dedup_exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} ), {% endif %} scd_data as ( diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql similarity index 87% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql index c5fed3b30237f..14a199ed47a88 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql @@ -1,7 +1,7 @@ {{ config( sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], unique_key = "_airbyte_unique_key", - schema = "test_normalization_bhhpj", + schema = "test_normalization_spffv", tags = [ "top-level" ] ) }} -- Final base SQL model @@ -21,7 +21,7 @@ select {{ current_timestamp() }} as _airbyte_normalized_at, _airbyte_dedup_exchange_rate_hashid from {{ ref('dedup_exchange_rate_scd') }} --- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +-- dedup_exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_dedup_exchange_rate') }} where 1 = 1 and _airbyte_active_row = 1 {{ incremental_clause('_airbyte_emitted_at', this) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql deleted file mode 100644 index 9a7a498cc3754..0000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,26 +0,0 @@ -{{ config( - sort = "_airbyte_emitted_at", - unique_key = '_airbyte_ab_id', - schema = "test_normalization_bhhpj", - tags = [ "top-level" ] -) }} --- Final base SQL model --- depends_on: {{ ref('exchange_rate_ab3') }} -select - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, - {{ adapter.quote('column`_\'with""_quotes') }}, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_exchange_rate') }} -where 1 = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql new file mode 100644 index 0000000000000..6d587ef82dcc8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization_spffv/exchange_rate.sql @@ -0,0 +1,42 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "test_normalization_spffv", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization_spffv', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql similarity index 93% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql index 9d10a9ea94901..a2070c639bc4a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql @@ -1,7 +1,7 @@ {{ config( sort = "_airbyte_emitted_at", unique_key = '_airbyte_ab_id', - schema = "_airbyte_test_normalization_bhhpj", + schema = "_airbyte_test_normalization_spffv", tags = [ "top-level-intermediate" ] ) }} -- SQL model to build a hash column based on the values of this record diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml index 4daf898b3002b..3d8790bc81634 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml @@ -1,6 +1,6 @@ version: 2 sources: -- name: test_normalization_bhhpj +- name: test_normalization_spffv quoting: database: true schema: false diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql similarity index 52% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql index de775a2e5c164..ea7873846f77a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql @@ -1,13 +1,13 @@ - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" + delete from "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" where (_airbyte_unique_key_scd) in ( select (_airbyte_unique_key_scd) from "dedup_exchange_rate_scd__dbt_tmp" ); - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + insert into "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") ( select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" from "dedup_exchange_rate_scd__dbt_tmp" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..ae44ccfc5883d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_vorny."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "normalization_tests".test_normalization_vorny."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql similarity index 52% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql index 372889fb42bda..eb09558982a0e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql @@ -1,13 +1,13 @@ - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" + delete from "normalization_tests".test_normalization_spffv."dedup_exchange_rate" where (_airbyte_unique_key) in ( select (_airbyte_unique_key) from "dedup_exchange_rate__dbt_tmp" ); - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + insert into "normalization_tests".test_normalization_spffv."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") ( select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" from "dedup_exchange_rate__dbt_tmp" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..24a1407f649b4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_vorny."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + + insert into "normalization_tests".test_normalization_vorny."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql new file mode 100644 index 0000000000000..f0cbba5b655d8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql @@ -0,0 +1,115 @@ + + + create table + "normalization_tests".test_normalization_spffv."exchange_rate__dbt_tmp" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + case when _airbyte_data."datetime_tz" != '' then _airbyte_data."datetime_tz" end as datetime_tz, + case when _airbyte_data."datetime_no_tz" != '' then _airbyte_data."datetime_no_tz" end as datetime_no_tz, + case when _airbyte_data."time_tz" != '' then _airbyte_data."time_tz" end as time_tz, + case when _airbyte_data."time_no_tz" != '' then _airbyte_data."time_no_tz" end as time_no_tz, + case when _airbyte_data."property_binary_data" != '' then _airbyte_data."property_binary_data" end as property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + TIMESTAMPTZ +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + cast(nullif(datetime_tz::varchar, '') as + TIMESTAMPTZ +) as datetime_tz, + cast(nullif(datetime_no_tz::varchar, '') as + TIMESTAMP +) as datetime_no_tz, + cast(nullif(time_tz::varchar, '') as + TIMETZ +) as time_tz, + cast(nullif(time_no_tz::varchar, '') as + TIME +) as time_no_tz, + cast(property_binary_data as text) as property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') || '-' || coalesce(cast(property_binary_data as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql new file mode 100644 index 0000000000000..8e60b4e38a6a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql @@ -0,0 +1,115 @@ + + + create table + "normalization_tests".test_normalization_vorny."exchange_rate__dbt_tmp" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + case when _airbyte_data."datetime_tz" != '' then _airbyte_data."datetime_tz" end as datetime_tz, + case when _airbyte_data."datetime_no_tz" != '' then _airbyte_data."datetime_no_tz" end as datetime_no_tz, + case when _airbyte_data."time_tz" != '' then _airbyte_data."time_tz" end as time_tz, + case when _airbyte_data."time_no_tz" != '' then _airbyte_data."time_no_tz" end as time_no_tz, + case when _airbyte_data."property_binary_data" != '' then _airbyte_data."property_binary_data" end as property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + TIMESTAMPTZ +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + cast(nullif(datetime_tz::varchar, '') as + TIMESTAMPTZ +) as datetime_tz, + cast(nullif(datetime_no_tz::varchar, '') as + TIMESTAMP +) as datetime_no_tz, + cast(nullif(time_tz::varchar, '') as + TIMETZ +) as time_tz, + cast(nullif(time_no_tz::varchar, '') as + TIME +) as time_no_tz, + cast(property_binary_data as text) as property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') || '-' || coalesce(cast(property_binary_data as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..460ee061096ab --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql @@ -0,0 +1,66 @@ + + + create view "normalization_tests"._airbyte_test_normalization_spffv."dedup_exchange_rate_stg__dbt_tmp" as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + TIMESTAMPTZ +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..7cb258900a1fb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql @@ -0,0 +1,66 @@ + + + create view "normalization_tests"._airbyte_test_normalization_vorny."dedup_exchange_rate_stg__dbt_tmp" as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_dedup_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_vorny._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + TIMESTAMPTZ +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql similarity index 52% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql index a193db25eb236..d9f559ef203d3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_spffv/dedup_exchange_rate_scd.sql @@ -1,13 +1,13 @@ - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" + delete from "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" where (_airbyte_unique_key_scd) in ( select (_airbyte_unique_key_scd) from "dedup_exchange_rate_scd__dbt_tmp" ); - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + insert into "normalization_tests".test_normalization_spffv."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") ( select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" from "dedup_exchange_rate_scd__dbt_tmp" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..e376359795385 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization_vorny/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_vorny."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "normalization_tests".test_normalization_vorny."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql similarity index 52% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql index 6afa610cc7215..2d11459f8b4b5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_spffv/dedup_exchange_rate.sql @@ -1,13 +1,13 @@ - delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" + delete from "normalization_tests".test_normalization_spffv."dedup_exchange_rate" where (_airbyte_unique_key) in ( select (_airbyte_unique_key) from "dedup_exchange_rate__dbt_tmp" ); - insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + insert into "normalization_tests".test_normalization_spffv."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") ( select "_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" from "dedup_exchange_rate__dbt_tmp" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..1f35d7840bee5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization_vorny/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "normalization_tests".test_normalization_vorny."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + + insert into "normalization_tests".test_normalization_vorny."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql similarity index 89% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql index 031baa2a7efbe..c8e0c37d6d6e4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization_spffv/exchange_rate.sql @@ -1,7 +1,7 @@ create table - "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" + "normalization_tests".test_normalization_spffv."exchange_rate__dbt_tmp" compound sortkey(_airbyte_emitted_at) @@ -11,7 +11,7 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, @@ -25,7 +25,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias +from "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate as table_alias -- exchange_rate where 1 = 1 ), __dbt__cte__exchange_rate_ab2 as ( @@ -44,7 +44,7 @@ select date ) as date, cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone + TIMESTAMPTZ ) as timestamp_col, cast("hkd@spéçiäl & characters" as float @@ -89,6 +89,6 @@ select getdate() as _airbyte_normalized_at, _airbyte_exchange_rate_hashid from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +-- exchange_rate from "normalization_tests".test_normalization_spffv._airbyte_raw_exchange_rate where 1 = 1 ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql similarity index 71% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql index e2bd3830cb423..dfa229adebae8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization_vorny/exchange_rate.sql @@ -1,7 +1,7 @@ create table - "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" + "normalization_tests".test_normalization_vorny."exchange_rate__dbt_tmp" compound sortkey(_airbyte_emitted_at) @@ -11,21 +11,21 @@ with __dbt__cte__exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", - case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias +from "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate as table_alias -- exchange_rate where 1 = 1 ), __dbt__cte__exchange_rate_ab2 as ( @@ -34,19 +34,21 @@ where 1 = 1 -- depends_on: __dbt__cte__exchange_rate_ab1 select cast(id as - bigint + float ) as id, cast(currency as text) as currency, + cast(new_column as + float +) as new_column, cast(nullif(date::varchar, '') as date ) as date, cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone + TIMESTAMPTZ ) as timestamp_col, cast("hkd@spéçiäl & characters" as float ) as "hkd@spéçiäl & characters", - cast(hkd_special___characters as text) as hkd_special___characters, cast(nzd as float ) as nzd, @@ -65,7 +67,7 @@ where 1 = 1 -- SQL model to build a hash column based on the values of this record -- depends_on: __dbt__cte__exchange_rate_ab2 select - md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, tmp.* from __dbt__cte__exchange_rate_ab2 tmp -- exchange_rate @@ -75,10 +77,10 @@ where 1 = 1 select id, currency, + new_column, date, timestamp_col, "hkd@spéçiäl & characters", - hkd_special___characters, nzd, usd, "column`_'with""_quotes", @@ -87,6 +89,6 @@ select getdate() as _airbyte_normalized_at, _airbyte_exchange_rate_hashid from __dbt__cte__exchange_rate_ab3 --- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +-- exchange_rate from "normalization_tests".test_normalization_vorny._airbyte_raw_exchange_rate where 1 = 1 ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql similarity index 88% rename from airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql rename to airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql index 8c9d36dd07d19..509c38a1c7502 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_spffv/dedup_exchange_rate_stg.sql @@ -1,11 +1,11 @@ - create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( + create view "normalization_tests"._airbyte_test_normalization_spffv."dedup_exchange_rate_stg__dbt_tmp" as ( with __dbt__cte__dedup_exchange_rate_ab1 as ( -- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema --- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +-- depends_on: "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate select case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, @@ -18,7 +18,7 @@ select _airbyte_ab_id, _airbyte_emitted_at, getdate() as _airbyte_normalized_at -from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias +from "normalization_tests".test_normalization_spffv._airbyte_raw_dedup_exchange_rate as table_alias -- dedup_exchange_rate where 1 = 1 @@ -38,7 +38,7 @@ select date ) as date, cast(nullif(timestamp_col::varchar, '') as - timestamp with time zone + TIMESTAMPTZ ) as timestamp_col, cast("hkd@spéçiäl & characters" as float diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..545ff13328cf6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization_vorny/dedup_exchange_rate_stg.sql @@ -0,0 +1,68 @@ + + + create view "normalization_tests"._airbyte_test_normalization_vorny."dedup_exchange_rate_stg__dbt_tmp" as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "normalization_tests".test_normalization_vorny._airbyte_raw_dedup_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "normalization_tests".test_normalization_vorny._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + float +) as id, + cast(currency as text) as currency, + cast(new_column as + float +) as new_column, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + TIMESTAMPTZ +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(nzd as + float +) as nzd, + cast(usd as + bigint +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 7b46e390d0575..a8c3a29ba971e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -33,12 +33,12 @@ from ( select distinct _AIRBYTE_UNIQUE_KEY as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', adapter.quote(this.schema) + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} ) recent_records left join ( select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', adapter.quote(this.schema) + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} group by _AIRBYTE_UNIQUE_KEY ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql index e35addfdeb762..38666ffd4727c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -21,6 +21,7 @@ select to_varchar(get_path(parse_json(_airbyte_data), '"datetime_no_tz"')) as DATETIME_NO_TZ, to_varchar(get_path(parse_json(_airbyte_data), '"time_tz"')) as TIME_TZ, to_varchar(get_path(parse_json(_airbyte_data), '"time_no_tz"')) as TIME_NO_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"property_binary_data"')) as PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT @@ -87,6 +88,7 @@ select cast(nullif(TIME_NO_TZ, '') as time ) as TIME_NO_TZ, + cast(BASE64_DECODE_BINARY(PROPERTY_BINARY_DATA) as VARBINARY) as PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT @@ -124,6 +126,8 @@ select varchar ), '') || '-' || coalesce(cast(TIME_NO_TZ as varchar +), '') || '-' || coalesce(cast(PROPERTY_BINARY_DATA as + varchar ), '') as varchar )) as _AIRBYTE_EXCHANGE_RATE_HASHID, @@ -147,6 +151,7 @@ select DATETIME_NO_TZ, TIME_TZ, TIME_NO_TZ, + PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 13f4936015110..85663304b4409 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -33,12 +33,12 @@ from ( select distinct _AIRBYTE_UNIQUE_KEY as unique_key from {{ this }} - where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', adapter.quote(this.schema) + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} ) recent_records left join ( select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count from {{ this }} - where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', adapter.quote(this.schema) + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} group by _AIRBYTE_UNIQUE_KEY ) active_counts on recent_records.unique_key = active_counts.unique_key diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql index 6b42adb3962da..60148a141461f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -2,6 +2,22 @@ cluster_by = ["_AIRBYTE_EMITTED_AT"], unique_key = '_AIRBYTE_AB_ID', schema = "TEST_NORMALIZATION", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='EXCHANGE_RATE_SCD' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], tags = [ "top-level" ] ) }} -- Final base SQL model @@ -20,6 +36,7 @@ select DATETIME_NO_TZ, TIME_TZ, TIME_NO_TZ, + PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql index e35addfdeb762..38666ffd4727c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -21,6 +21,7 @@ select to_varchar(get_path(parse_json(_airbyte_data), '"datetime_no_tz"')) as DATETIME_NO_TZ, to_varchar(get_path(parse_json(_airbyte_data), '"time_tz"')) as TIME_TZ, to_varchar(get_path(parse_json(_airbyte_data), '"time_no_tz"')) as TIME_NO_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"property_binary_data"')) as PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT @@ -87,6 +88,7 @@ select cast(nullif(TIME_NO_TZ, '') as time ) as TIME_NO_TZ, + cast(BASE64_DECODE_BINARY(PROPERTY_BINARY_DATA) as VARBINARY) as PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT @@ -124,6 +126,8 @@ select varchar ), '') || '-' || coalesce(cast(TIME_NO_TZ as varchar +), '') || '-' || coalesce(cast(PROPERTY_BINARY_DATA as + varchar ), '') as varchar )) as _AIRBYTE_EXCHANGE_RATE_HASHID, @@ -147,6 +151,7 @@ select DATETIME_NO_TZ, TIME_TZ, TIME_NO_TZ, + PROPERTY_BINARY_DATA, _AIRBYTE_AB_ID, _AIRBYTE_EMITTED_AT, convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..b43b3dad276e3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/dbt_project.yml @@ -0,0 +1,121 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +vars: + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + nested_stream_with_co_1g_into_long_names_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_stg: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_scd: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co__lting_into_long_names: test_normalization._airbyte_raw_nested_s__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab1: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab2: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab3: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit__lting_into_long_names: test_normalization._airbyte_raw_non_nest__lting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_na_1g_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na_1g_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na_1g_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + arrays_ab1: test_normalization._airbyte_raw_arrays + arrays_ab2: test_normalization._airbyte_raw_arrays + arrays_ab3: test_normalization._airbyte_raw_arrays + arrays: test_normalization._airbyte_raw_arrays + nested_stream_with_co_2g_names_partition_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_2g_names_partition_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_2g_names_partition_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co___long_names_partition: test_normalization._airbyte_raw_nested_s__lting_into_long_names + conflict_stream_name__2flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__2flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__2flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization._airbyte_raw_arrays + nested_stream_with_co_3double_array_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3double_array_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3double_array_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co__ion_double_array_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co___names_partition_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + conflict_stream_name__3flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__3flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__3flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name____conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql new file mode 100644 index 0000000000000..d638e7a898ff3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} as table_alias +-- nested_stream_with_co__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql new file mode 100644 index 0000000000000..b688e0746faaf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} +select + cast(id as {{ dbt_utils.type_string() }}) as id, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, + cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} +-- nested_stream_with_co__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql new file mode 100644 index 0000000000000..427a929211b27 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +select + _airbyte_nested_strea__nto_long_names_hashid, + {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, + {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and {{ adapter.quote('partition') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql new file mode 100644 index 0000000000000..a8ca4bbb7d40f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', 'double_array_data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('partition', 'double_array_data') }} +where 1 = 1 +and double_array_data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql new file mode 100644 index 0000000000000..cdf1151ee10d7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', adapter.quote('DATA')) }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('partition', adapter.quote('DATA')) }} +where 1 = 1 +and {{ adapter.quote('DATA') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql new file mode 100644 index 0000000000000..f4b9f7bb2f074 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -0,0 +1,162 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_co__lting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('nested_stream_with_co_1g_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by id + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_strea__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..0c8adc779de9f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,19 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_partition_hashid +from {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..92e44abc92988 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,18 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_data_hashid +from {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_co___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..6a17d6258b3e6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,18 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_3double_array_data_ab3') }} +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from {{ ref('nested_stream_with_co_3double_array_data_ab3') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_co___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..0ea84390902e9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,22 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +select + _airbyte_unique_key, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +-- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..50def309c8c44 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,23 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_arrays + - name: _airbyte_raw_conflict_stream_array + - name: _airbyte_raw_conflict_stream_name + - name: _airbyte_raw_conflict_stream_scalar + - name: _airbyte_raw_nested_s__lting_into_long_names + - name: _airbyte_raw_non_nest__lting_into_long_names + - name: _airbyte_raw_some_stream_that_was_empty + - name: _airbyte_raw_unnest_alias +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_s__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql new file mode 100644 index 0000000000000..e755a1afc1938 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`nested_stream_with_co_1g_into_long_names_scd` (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_strea__nto_long_names_hashid`) + ( + select `_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_strea__nto_long_names_hashid` + from test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..81c83857e4f85 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`nested_stream_with_co___long_names_partition` (`_airbyte_nested_strea__nto_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`) + ( + select `_airbyte_nested_strea__nto_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid` + from test_normalization.`nested_stream_with_co___long_names_partition__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..4aefae0a0267d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`nested_stream_with_co___names_partition_data` (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_data_hashid`) + ( + select `_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_data_hashid` + from test_normalization.`nested_stream_with_co___names_partition_data__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..48aabbfa4a771 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`nested_stream_with_co__ion_double_array_data` (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`) + ( + select `_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid` + from test_normalization.`nested_stream_with_co__ion_double_array_data__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..331e7b37cd990 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`nested_stream_with_co__lting_into_long_names` (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_strea__nto_long_names_hashid`) + ( + select `_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_strea__nto_long_names_hashid` + from test_normalization.`nested_stream_with_co__lting_into_long_names__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..7321f340e62ca --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/dbt_project.yml @@ -0,0 +1,86 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +vars: + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization._airbyte_raw_types_testing + types_testing_ab2: test_normalization._airbyte_raw_types_testing + types_testing_stg: test_normalization._airbyte_raw_types_testing + types_testing_scd: test_normalization._airbyte_raw_types_testing + types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..74ac8045f1caa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..2dd5d71c8738c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..f32d666a86301 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,112 @@ + + create view _airbyte_test_normalization.`dedup_exchange_rate_stg__dbt_tmp` as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate +select + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."id"')) + ) as id, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."currency"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."currency"')) + ) as currency, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."date"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."date"')) + ) as `date`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."timestamp_col"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."timestamp_col"')) + ) as timestamp_col, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD@spéçiäl & characters"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD@spéçiäl & characters"')) + ) as `HKD@spéçiäl & characters`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD_special___characters"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD_special___characters"')) + ) as hkd_special___characters, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."NZD"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."NZD"')) + ) as nzd, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."USD"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."USD"')) + ) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + current_timestamp() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + signed +) as id, + cast(currency as char(1000)) as currency, + case when `date` = '' then NULL + else cast(`date` as date) + end as `date` + , + cast(nullif(timestamp_col, '') as char(1000)) as timestamp_col, + cast(`HKD@spéçiäl & characters` as + float +) as `HKD@spéçiäl & characters`, + cast(hkd_special___characters as char(1000)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + current_timestamp() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(concat(coalesce(cast(id as char(1000)), ''), '-', coalesce(cast(currency as char(1000)), ''), '-', coalesce(cast(`date` as char(1000)), ''), '-', coalesce(cast(timestamp_col as char(1000)), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char(1000)), ''), '-', coalesce(cast(hkd_special___characters as char(1000)), ''), '-', coalesce(cast(nzd as char(1000)), ''), '-', coalesce(cast(usd as char(1000)), '')) as char(1000))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..f1e29571d47bd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,103 @@ + + create view _airbyte_test_normalization.`multiple_column_names_conflicts_stg__dbt_tmp` as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_multiple_column_names_conflicts +select + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."id"')) + ) as id, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."User Id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."User Id"')) + ) as `User Id`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."user_id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."user_id"')) + ) as user_id, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."User id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."User id"')) + ) as `User id_1`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."user id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."user id"')) + ) as `user id_2`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."User@Id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."User@Id"')) + ) as `User@Id`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."UserId"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."UserId"')) + ) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + current_timestamp() as _airbyte_normalized_at +from test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + signed +) as id, + cast(`User Id` as char(1000)) as `User Id`, + cast(user_id as + float +) as user_id, + cast(`User id_1` as + float +) as `User id_1`, + cast(`user id_2` as + float +) as `user id_2`, + cast(`User@Id` as char(1000)) as `User@Id`, + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + current_timestamp() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(concat(coalesce(cast(id as char(1000)), ''), '-', coalesce(cast(`User Id` as char(1000)), ''), '-', coalesce(cast(user_id as char(1000)), ''), '-', coalesce(cast(`User id_1` as char(1000)), ''), '-', coalesce(cast(`user id_2` as char(1000)), ''), '-', coalesce(cast(`User@Id` as char(1000)), ''), '-', coalesce(cast(userid as char(1000)), '')) as char(1000))) as _airbyte_multiple_col__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..670db0869ae22 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..8c26a8139c260 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_bigint() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + case when {{ adapter.quote('date') }} = '' then NULL + else cast({{ adapter.quote('date') }} as date) + end as {{ adapter.quote('date') }} + , + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..499ae700143f8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,176 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', adapter.quote(this.schema) + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..dd4432bd60a5e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..bc56f1c5a3cc9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,46 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + post_hook = [" + {% + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='exchange_rate_scd' + ) + %} + {% + if scd_table_relation is not none + %} + {% + do adapter.drop_relation(scd_table_relation) + %} + {% endif %} + "], + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('column__\'with"_quotes') }}, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + property_binary_data, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..86ec2c9e8b1b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'hkd_special___characters', + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..f51802427655e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..77b40906d7df9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`dedup_exchange_rate_scd` (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD@spéçiäl & characters`, `hkd_special___characters`, `nzd`, `usd`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + ( + select `_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD@spéçiäl & characters`, `hkd_special___characters`, `nzd`, `usd`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid` + from test_normalization.`dedup_exchange_rate_scd__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..d67b7a41aba1b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,7 @@ + + insert into test_normalization.`dedup_exchange_rate` (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD@spéçiäl & characters`, `hkd_special___characters`, `nzd`, `usd`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + ( + select `_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD@spéçiäl & characters`, `hkd_special___characters`, `nzd`, `usd`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid` + from test_normalization.`dedup_exchange_rate__dbt_tmp` + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..2dd5d71c8738c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..f32d666a86301 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/tidb/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,112 @@ + + create view _airbyte_test_normalization.`dedup_exchange_rate_stg__dbt_tmp` as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate +select + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."id"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."id"')) + ) as id, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."currency"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."currency"')) + ) as currency, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."date"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."date"')) + ) as `date`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."timestamp_col"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."timestamp_col"')) + ) as timestamp_col, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD@spéçiäl & characters"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD@spéçiäl & characters"')) + ) as `HKD@spéçiäl & characters`, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD_special___characters"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."HKD_special___characters"')) + ) as hkd_special___characters, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."NZD"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."NZD"')) + ) as nzd, + IF( + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."USD"')) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT(_airbyte_data, + '$."USD"')) + ) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + current_timestamp() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + signed +) as id, + cast(currency as char(1000)) as currency, + case when `date` = '' then NULL + else cast(`date` as date) + end as `date` + , + cast(nullif(timestamp_col, '') as char(1000)) as timestamp_col, + cast(`HKD@spéçiäl & characters` as + float +) as `HKD@spéçiäl & characters`, + cast(hkd_special___characters as char(1000)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + current_timestamp() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(concat(coalesce(cast(id as char(1000)), ''), '-', coalesce(cast(currency as char(1000)), ''), '-', coalesce(cast(`date` as char(1000)), ''), '-', coalesce(cast(timestamp_col as char(1000)), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char(1000)), ''), '-', coalesce(cast(hkd_special___characters as char(1000)), ''), '-', coalesce(cast(nzd as char(1000)), ''), '-', coalesce(cast(usd as char(1000)), '')) as char(1000))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/destination_catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/destination_catalog.json index 11fcc6819fb1c..8fc21bb20955a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/destination_catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/destination_catalog.json @@ -8,22 +8,22 @@ "$schema": "http://json-schema.org/draft-07/schema#", "properties": { "id": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "name": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "is_default": { - "type": ["null", "boolean"] + "$ref": "WellKnownTypes.json#/definitions/Boolean" }, "species": { "type": ["null", "object"], "properties": { "name": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "url": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -33,10 +33,10 @@ "type": ["null", "object"], "properties": { "name": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "url": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages1.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages1.txt index 0238a28633164..f10511278564e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages1.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages1.txt @@ -1,4 +1,4 @@ -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":132,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508770}} -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":133,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508771}} -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":134,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508772}} -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":135,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508773}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"132","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508770}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"133","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508771}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"134","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508772}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"135","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508773}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages2.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages2.txt index b5b4385b2d562..eed530ebc214c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages2.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/redshift_normalization_migration/messages2.txt @@ -1,4 +1,4 @@ -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":132,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508774}} -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":133,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508775}} -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":134,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508776}} -{"type":"RECORD","record":{"stream":"pokemon","data":{"id":135,"name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508777}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"132","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508774}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"133","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508775}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"134","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508776}} +{"type":"RECORD","record":{"stream":"pokemon","data":{"id":"135","name":"ditto","is_default":true,"species":{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-species/132/"},"forms":[{"name":"ditto","url":"https://pokeapi.co/api/v2/pokemon-form/132/"}]},"emitted_at":1650266508777}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json index 4e5105f136e09..6119090daa1d6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json @@ -7,10 +7,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "date": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "partition": { "type": ["null", "object"], @@ -22,7 +25,7 @@ "items": { "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -33,7 +36,7 @@ "items": { "properties": { "currency": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -58,10 +61,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "date": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -80,10 +86,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "date": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -104,10 +113,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "date": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -126,7 +138,10 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "conflict_stream_name": { "type": ["null", "object"], @@ -137,14 +152,14 @@ "type": "object", "properties": { "groups": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } }, "custom_fields": { "items": { "properties": { "id": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "value": {} }, @@ -153,7 +168,7 @@ "type": ["null", "array"] }, "conflict_stream_name": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" } } } @@ -176,10 +191,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "conflict_stream_scalar": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -198,7 +216,10 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "number", "string"] + "oneOf": [ + { "$ref": "WellKnownTypes.json#/definitions/Number" }, + { "$ref": "WellKnownTypes.json#/definitions/String" } + ] }, "conflict_stream_array": { "type": ["null", "array"], @@ -208,7 +229,7 @@ "items": { "properties": { "id": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } } @@ -232,7 +253,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "children": { "type": ["null", "array"], @@ -240,20 +261,20 @@ "type": "object", "properties": { "ab_id": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "owner": { "type": ["null", "object"], "properties": { "owner_id": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "column`_'with\"_quotes": { "type": ["null", "array"], "items": { "properties": { "currency": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -282,7 +303,7 @@ "array_of_strings": { "type": ["null", "array"], "items": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } }, "nested_array_parent": { @@ -291,7 +312,7 @@ "nested_array": { "type": ["null", "array"], "items": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt index e349c09afc31b..1c290c079a74c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt @@ -1,18 +1,18 @@ -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": 4.2, "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": "4.2", "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} {"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599100, "data": { "id": "test record", "date": "2020-08-31T00:00:00Z", "partition": { "double_array_data": [[ { "id": "USD" } ], [ { "id": "GBP" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":1,"conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":2,"conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":"1","conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":"1", "value":"3"}, {"id":"2", "value":"4"}], "conflict_stream_name": "3"}}},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":"2","conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":"1", "value":"3"}, {"id":"2", "value":"4"}], "conflict_stream_name": "3"}}},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"1","conflict_stream_scalar": "2"},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"2","conflict_stream_scalar": "2"},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":1, "conflict_stream_array": {"conflict_stream_array": [{"id": 1}, {"id": 2}, {"id": 3}]}}, "emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":2, "conflict_stream_array": {"conflict_stream_array": [{"id": 4}, {"id": 5}, {"id": 6}]}}, "emitted_at":1623861860}} +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":"1", "conflict_stream_array": {"conflict_stream_array": [{"id": "1"}, {"id": "2"}, {"id": "3"}]}}, "emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":"2", "conflict_stream_array": {"conflict_stream_array": [{"id": "4"}, {"id": "5"}, {"id": "6"}]}}, "emitted_at":1623861860}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"1","conflict_stream_scalar": "2"},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"2","conflict_stream_scalar": "2"},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":1, "children": [{"ab_id": 1, "owner": {"owner_id": 1, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 2, "owner": {"owner_id": 2, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":2, "children": [{"ab_id": 3, "owner": {"owner_id": 3, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 4, "owner": {"owner_id": 4, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":"1", "children": [{"ab_id": "1", "owner": {"owner_id": "1", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": "2", "owner": {"owner_id": "2", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":"2", "children": [{"ab_id": "3", "owner": {"owner_id": "3", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": "4", "owner": {"owner_id": "4", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} {"type":"RECORD","record":{"stream":"arrays","emitted_at":1602638599000,"data":{"array_of_strings":["string1",null,"string2","string3"],"nested_array_parent":{"nested_array":["string1",null,"string2"]}}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt index ae1cf0f5c0b4e..c7b4bcfec5f74 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt @@ -1,22 +1,22 @@ -{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": 4.2, "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": "4.2", "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} {"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599100, "data": { "id": "test record", "date": "2020-08-31T00:00:00Z", "partition": { "double_array_data": [[ { "id": "USD" } ], [ { "id": "GBP" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} {"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638600000, "data": { "id": "new record", "date": "2020-09-10T00:00:00Z", "partition": { "double_array_data": [[ { "id": "GBP" } ], [ { "id": "HKD" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":1,"conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":2,"conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":"1","conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":"1", "value":3}, {"id":"2", "value":4}], "conflict_stream_name": "3"}}},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":"2","conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":"1", "value":3}, {"id":"2", "value":4}], "conflict_stream_name": "3"}}},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"1","conflict_stream_scalar": "2"},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"2","conflict_stream_scalar": "2"},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":1, "conflict_stream_array": {"conflict_stream_array": [{"id": 1}, {"id": 2}, {"id": 3}]}}, "emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":2, "conflict_stream_array": {"conflict_stream_array": [{"id": 4}, {"id": 5}, {"id": 6}]}}, "emitted_at":1623861860}} +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":"1", "conflict_stream_array": {"conflict_stream_array": [{"id": "1"}, {"id": "2"}, {"id": "3"}]}}, "emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":"2", "conflict_stream_array": {"conflict_stream_array": [{"id": "4"}, {"id": "5"}, {"id": "6"}]}}, "emitted_at":1623861860}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"1","conflict_stream_scalar": "2"},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":"2","conflict_stream_scalar": "2"},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":1, "children": [{"ab_id": 1, "owner": {"owner_id": 1, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 2, "owner": {"owner_id": 2, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":2, "children": [{"ab_id": 3, "owner": {"owner_id": 3, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 4, "owner": {"owner_id": 4, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":"1", "children": [{"ab_id": "1", "owner": {"owner_id": "1", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": "2", "owner": {"owner_id": "2", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":"2", "children": [{"ab_id": "3", "owner": {"owner_id": "3", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": "4", "owner": {"owner_id": "4", "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":1,"date": "2020-11-05"},"emitted_at":1623871660}} -{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":2,"date": "2020-11-06"},"emitted_at":1623872660}} -{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":3,"date": "2020-11-06"},"emitted_at":1623873660}} +{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":"1","date": "2020-11-05"},"emitted_at":1623871660}} +{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":"2","date": "2020-11-06"},"emitted_at":1623872660}} +{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":"3","date": "2020-11-06"},"emitted_at":1623873660}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json index 37d6c7d9a939c..db77210837336 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json @@ -7,29 +7,25 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "datetime_to_string": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "string_to_dt": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "number_to_int": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "int_to_number": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json index 04b78b4b435f6..524761cddb67c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json @@ -7,29 +7,25 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "datetime_to_string": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "string_to_dt": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "number_to_int": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "int_to_number": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json index 9a76b76cda8b6..08b499629be28 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json @@ -7,29 +7,25 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "datetime_to_string": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "string_to_dt": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "number_to_int": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "int_to_number": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt index e35685cb629a4..163e7a966826d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt @@ -1,5 +1,5 @@ -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": 1, "int_to_number": 10}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": 2, "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": 10, "int_to_number": 11}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": 3, "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": 11, "int_to_number": 12}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": 4, "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": 111, "int_to_number": 133}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 5, "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": "1", "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": "1", "int_to_number": "10"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": "2", "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": "10", "int_to_number": "11"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": "3", "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": "11", "int_to_number": "12"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": "4", "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": "111", "int_to_number": "133"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": "5", "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": "1010", "int_to_number": "1300"}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt index 492efbaea0aea..985706845fadd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt @@ -1,6 +1,6 @@ -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": 1, "int_to_number": 10}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": 2, "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": 10, "int_to_number": 11}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": 3, "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": 11, "int_to_number": 12}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": 4, "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": 111, "int_to_number": 133}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 5, "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300}}} -{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 6, "date": "2022-09-03", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"this is a string, not a datetime value", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300.25}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": "1", "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": "1", "int_to_number": "10"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": "2", "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": "10", "int_to_number": "11"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": "3", "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": "11", "int_to_number": "12"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": "4", "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": "111", "int_to_number": "133"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": "5", "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": "1010", "int_to_number": "1300"}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": "6", "date": "2022-09-03", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"this is a string, not a datetime value", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": "1010", "int_to_number": "1300.25"}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json index daef42fdf2205..4d2b83d2b8e2c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json @@ -7,53 +7,46 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "currency": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "HKD@spéçiäl & characters": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "HKD_special___characters": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "NZD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "USD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "column`_'with\"_quotes": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "datetime_tz": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_with_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "datetime_no_tz": { - "type": "string", - "format": "date-time", - "airbyte_type": "timestamp_without_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone" }, "time_tz": { - "type": "string", - "format": "time", - "airbyte_type": "time_with_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimeWithTimezone" }, "time_no_tz": { - "type": "string", - "format": "time", - "airbyte_type": "time_without_timezone" + "$ref": "WellKnownTypes.json#/definitions/TimeWithoutTimezone" + }, + "property_binary_data": { + "$ref": "WellKnownTypes.json#/definitions/BinaryData" } } }, @@ -72,30 +65,28 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "currency": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "HKD@spéçiäl & characters": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "HKD_special___characters": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "NZD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "USD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, @@ -115,10 +106,10 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "_ab_cdc_updated_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, @@ -138,19 +129,19 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "name": { - "type": ["string", "null"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "_ab_cdc_lsn": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_updated_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_deleted_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, @@ -170,22 +161,22 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "name": { - "type": ["string", "null"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "_ab_cdc_lsn": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_updated_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_deleted_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_log_pos": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, @@ -205,14 +196,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "text": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -232,25 +222,25 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "User Id": { - "type": ["string", "null"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "user_id": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "User id": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "user id": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "User@Id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "UserId": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, @@ -270,15 +260,16 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "airbyte_integer_column": { - "type": "number", - "airbyte_type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "nullable_airbyte_integer_column": { - "type": ["null", "number"], - "airbyte_type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" + }, + "property_binary_data": { + "$ref": "WellKnownTypes.json#/definitions/BinaryData" } } } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json index a54e89c4ff2ee..b6bcbb6c31996 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json @@ -7,33 +7,31 @@ "type": ["null", "object"], "properties": { "id": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "currency": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "new_column": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "HKD@spéçiäl & characters": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "NZD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "USD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "column`_'with\"_quotes": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -52,30 +50,28 @@ "type": ["null", "object"], "properties": { "id": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "currency": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "new_column": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "date": { - "type": "string", - "format": "date" + "$ref": "WellKnownTypes.json#/definitions/Date" }, "timestamp_col": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" }, "HKD@spéçiäl & characters": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "NZD": { - "type": "number" + "$ref": "WellKnownTypes.json#/definitions/Number" }, "USD": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -95,19 +91,19 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "name": { - "type": ["string", "null"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "_ab_cdc_lsn": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_updated_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_deleted_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, @@ -127,19 +123,19 @@ "type": ["null", "object"], "properties": { "id": { - "type": "integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "name": { - "type": ["string", "null"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "_ab_cdc_lsn": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_updated_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" }, "_ab_cdc_deleted_at": { - "type": ["null", "number"] + "$ref": "WellKnownTypes.json#/definitions/Number" } } }, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt index ed52bb6c8674b..94f7aab44927c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt @@ -1,67 +1,67 @@ -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637589000, "data": { "id": 1, "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": 1.14, "HKD@spéçiäl & characters": 2.13, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637689100, "data": { "id": 1, "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 7.15, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637789200, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": 3.89, "HKD@spéçiäl & characters": 7.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637889300, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": 1.14, "HKD@spéçiäl & characters": 7.99, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.99}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637989400, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 8, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990700, "data": { "id": 1, "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 1.14, "HKD@spéçiäl & characters": 10.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 7.02, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991100, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 8.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991200, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991300, "data": { "id": 6, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "datetime_tz": "2022-01-14T01:04:04-04:00", "datetime_no_tz": "2022-01-14T01:04:04", "time_tz": "01:04:04-04:00", "time_no_tz": "01:04:04"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637589000, "data": { "id": "1", "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": "1.14", "HKD@spéçiäl & characters": "2.13", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "property_binary_data": "dGVzdA==" }}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637689100, "data": { "id": "1", "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": "1.14", "HKD@spéçiäl & characters": "7.15", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637789200, "data": { "id": "2", "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": "3.89", "HKD@spéçiäl & characters": "7.12", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10.16"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637889300, "data": { "id": "2", "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": "1.14", "HKD@spéçiäl & characters": "7.99", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10.99"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637989400, "data": { "id": "2", "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": "2.43", "HKD@spéçiäl & characters": "8", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10.16"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990700, "data": { "id": "1", "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": "1.14", "HKD@spéçiäl & characters": "10.5", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": "2", "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": "2.43", "HKD@spéçiäl & characters": "5.4", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": "2", "currency": "EUR", "date": "", "timestamp_col": "", "NZD": "2.43", "HKD@spéçiäl & characters": "5.4", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": "3", "currency": "GBP", "NZD": "3.14", "HKD@spéçiäl & characters": "9.2", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991000, "data": { "id": "2", "currency": "EUR", "NZD": "3.89", "HKD@spéçiäl & characters": "7.02", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991100, "data": { "id": "5", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "8.12", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991200, "data": { "id": "5", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "9.23", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991300, "data": { "id": "6", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "9.23", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "datetime_tz": "2022-01-14T01:04:04-04:00", "datetime_no_tz": "2022-01-14T01:04:04", "time_tz": "01:04:04-04:00", "time_no_tz": "01:04:04"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637589000, "data": { "id": 1, "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": 1.14, "HKD@spéçiäl & characters": 2.13, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637689100, "data": { "id": 1, "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 7.15, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637789200, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": 3.89, "HKD@spéçiäl & characters": 7.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637889300, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": 1.14, "HKD@spéçiäl & characters": 7.99, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.99}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637989400, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 8, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990700, "data": { "id": 1, "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 1.14, "HKD@spéçiäl & characters": 10.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 7.02, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991100, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 8.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991200, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637589000, "data": { "id": "1", "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": "1.14", "HKD@spéçiäl & characters": "2.13", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637689100, "data": { "id": "1", "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": "1.14", "HKD@spéçiäl & characters": "7.15", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637789200, "data": { "id": "2", "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": "3.89", "HKD@spéçiäl & characters": "7.12", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10.16"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637889300, "data": { "id": "2", "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": "1.14", "HKD@spéçiäl & characters": "7.99", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10.99"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637989400, "data": { "id": "2", "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": "2.43", "HKD@spéçiäl & characters": "8", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10.16"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990700, "data": { "id": "1", "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": "1.14", "HKD@spéçiäl & characters": "10.5", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": "2", "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": "2.43", "HKD@spéçiäl & characters": "5.4", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": "2", "currency": "EUR", "date": "", "timestamp_col": "", "NZD": "2.43", "HKD@spéçiäl & characters": "5.4", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": "3", "currency": "GBP", "NZD": "3.14", "HKD@spéçiäl & characters": "9.2", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991000, "data": { "id": "2", "currency": "EUR", "NZD": "3.89", "HKD@spéçiäl & characters": "7.02", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991100, "data": { "id": "5", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "8.12", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991200, "data": { "id": "5", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "9.23", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} # Note that some of the IDs are inserted and then deleted; this should be reflected as a single row in the SCD model with _airbyte_active_row set to 0. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":4,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":6,"name":"opel","_ab_cdc_updated_at":1623850868109,"_ab_cdc_lsn":27009440,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":7,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"1","name":"mazda","_ab_cdc_updated_at":"1623849130530","_ab_cdc_lsn":"26971624","_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"2","name":"toyata","_ab_cdc_updated_at":"1623849130549","_ab_cdc_lsn":"26971624","_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"4","name":"bmw","_ab_cdc_updated_at":"1623849314535","_ab_cdc_lsn":"26974776","_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"5","name":"vw","_ab_cdc_updated_at":"1623849314663","_ab_cdc_lsn":"26975264","_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"4","name":null,"_ab_cdc_updated_at":"1623849314791","_ab_cdc_lsn":"26975440","_ab_cdc_deleted_at":"1623849314791"},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"6","name":"opel","_ab_cdc_updated_at":"1623850868109","_ab_cdc_lsn":"27009440","_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"7","name":"lotus","_ab_cdc_updated_at":"1623850868237","_ab_cdc_lsn":"27010048","_ab_cdc_deleted_at":null},"emitted_at":1623861660}} # messages_incremental.txt has a dedup_cdc_excluded record with emitted_at=1623860160, i.e. older than this record. If you delete/modify this record, make sure to maintain that relationship. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":6,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"6","name":null,"_ab_cdc_updated_at":"1623850868371","_ab_cdc_lsn":"27010232","_ab_cdc_deleted_at":"1623850868371"},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_log_pos": 33278,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":3,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33271,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lotus","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33280,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lily","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33281,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"1","name":"mazda","_ab_cdc_updated_at":"1623849130530","_ab_cdc_lsn":"26971624","_ab_cdc_log_pos": "33274","_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"2","name":"toyata","_ab_cdc_updated_at":"1623849130549","_ab_cdc_lsn":"26971624","_ab_cdc_log_pos": "33275","_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"2","name":"bmw","_ab_cdc_updated_at":"1623849314535","_ab_cdc_lsn":"26974776","_ab_cdc_log_pos": "33278","_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"3","name":null,"_ab_cdc_updated_at":"1623849314791","_ab_cdc_lsn":"26975440","_ab_cdc_log_pos": "33274","_ab_cdc_deleted_at":"1623849314791"},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"4","name":"lotus","_ab_cdc_updated_at":"1623850868237","_ab_cdc_lsn":"27010048","_ab_cdc_log_pos": "33271","_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"4","name":null,"_ab_cdc_updated_at":"1623850868371","_ab_cdc_lsn":"27010232","_ab_cdc_log_pos": "33279","_ab_cdc_deleted_at":"1623850868371"},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"5","name":"lotus","_ab_cdc_updated_at":"1623850868371","_ab_cdc_lsn":"27010048","_ab_cdc_log_pos": "33280","_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"5","name":"lily","_ab_cdc_updated_at":"1623850868371","_ab_cdc_lsn":"27010232","_ab_cdc_log_pos": "33281","_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2020-08-29", "text": "hi 1"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637689100, "data": { "id": 1, "date": "2020-08-30", "text": "hi 2"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637789200, "data": { "id": 2, "date": "2020-08-31", "text": "hi 1"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637889300, "data": { "id": 2, "date": "2020-08-31", "text": "hi 2"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637989400, "data": { "id": 2, "date": "2020-09-01", "text": "hi 3"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990700, "data": { "id": 1, "date": "2020-09-01", "text": "hi 3"}}} -{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990800, "data": { "id": 2, "date": "2020-09-01", "text": "hi 4"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637589000, "data": { "id": "1", "date": "2020-08-29", "text": "hi 1"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637689100, "data": { "id": "1", "date": "2020-08-30", "text": "hi 2"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637789200, "data": { "id": "2", "date": "2020-08-31", "text": "hi 1"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637889300, "data": { "id": "2", "date": "2020-08-31", "text": "hi 2"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637989400, "data": { "id": "2", "date": "2020-09-01", "text": "hi 3"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990700, "data": { "id": "1", "date": "2020-09-01", "text": "hi 3"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990800, "data": { "id": "2", "date": "2020-09-01", "text": "hi 4"}}} -{"type":"RECORD","record":{"stream":"multiple_column_names_conflicts","data":{"id":1,"User Id":"chris","user_id":42,"User id":300,"user id": 102,"UserId":101},"emitted_at":1623959926}} +{"type":"RECORD","record":{"stream":"multiple_column_names_conflicts","data":{"id":"1","User Id":"chris","user_id":"42","User id":"300","user id": "102","UserId": "101"},"emitted_at":1623959926}} # These records are verified in types_testing_incorrect_values.sql. If you add/remove entries, make sure to update that file as well. # IMPORTANT: big_integer_column and nullable_big_integer_column were removed from catalog.json because of difficulties in implementing NUMERIC support. # This is fine, because no major sources currently produce big_integer fields. # After that functionality is completed, we should restore their entries to catalog.json. # Verify max value for int64, and a 28-digit value for big_integer. (28 is larger than an int64 can handle, but still within bounds for a BigQuery NUMERIC column) -{"type":"RECORD","record":{"stream":"types_testing","data":{"id":1,"airbyte_integer_column":9223372036854775807,"nullable_airbyte_integer_column":9223372036854775807,"big_integer_column":"1234567890123456789012345678","nullable_big_integer_column":"1234567890123456789012345678"},"emitted_at":1623959926}} +{"type":"RECORD","record":{"stream":"types_testing","data":{"id":"1","airbyte_integer_column":"9223372036854775807","nullable_airbyte_integer_column":"9223372036854775807","big_integer_column":"1234567890123456789012345678","nullable_big_integer_column":"1234567890123456789012345678", "property_binary_data": "dGVzdA=="},"emitted_at":1623959926}} # Verify max value for int64, and a negative 28-digit value for big_integer -{"type":"RECORD","record":{"stream":"types_testing","data":{"id":2,"airbyte_integer_column":-9223372036854775808,"nullable_airbyte_integer_column":-9223372036854775808,"big_integer_column":"-1234567890123456789012345678","nullable_big_integer_column":"-1234567890123456789012345678"},"emitted_at":1623959926}} +{"type":"RECORD","record":{"stream":"types_testing","data":{"id":"2","airbyte_integer_column":"-9223372036854775808","nullable_airbyte_integer_column":"-9223372036854775808","big_integer_column":"-1234567890123456789012345678","nullable_big_integer_column":"-1234567890123456789012345678"},"emitted_at":1623959926}} # Verify nullable values -{"type":"RECORD","record":{"stream":"types_testing","data":{"id":3,"airbyte_integer_column":0,"big_integer_column":0},"emitted_at":1623959926}} +{"type":"RECORD","record":{"stream":"types_testing","data":{"id":"3","airbyte_integer_column":"0","big_integer_column":"0", "property_binary_data": null},"emitted_at":1623959926}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt index 98c8ae988e782..b7e0fcfeee427 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt @@ -4,35 +4,35 @@ # (I think?) This mimics an interruption to normalization, such that some records were normalized but others were not. # These first records are old data. -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": "2", "currency": "EUR", "date": "", "timestamp_col": "", "NZD": "2.43", "HKD@spéçiäl & characters": "5.4", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": "3", "currency": "GBP", "NZD": "3.14", "HKD@spéçiäl & characters": "9.2", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} # These records are new data. -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650000000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 14.05, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650010000, "data": { "id": 4, "currency": "HKD", "NZD": 1.19, "HKD@spéçiäl & characters": 0.01, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650011000, "data": { "id": 1, "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 9.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650012000, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 6.39, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650000000, "data": { "id": "2", "currency": "EUR", "NZD": "3.89", "HKD@spéçiäl & characters": "14.05", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650010000, "data": { "id": "4", "currency": "HKD", "NZD": "1.19", "HKD@spéçiäl & characters": "0.01", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650011000, "data": { "id": "1", "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": "1.14", "HKD@spéçiäl & characters": "9.5", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650012000, "data": { "id": "5", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "6.39", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} # These first records are old data. -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": "2", "currency": "EUR", "date": "", "timestamp_col": "", "NZD": "2.43", "HKD@spéçiäl & characters": "5.4", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": "3", "currency": "GBP", "NZD": "3.14", "HKD@spéçiäl & characters": "9.2", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} # These records are new data. -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650000000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 14.05, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650010000, "data": { "id": 4, "currency": "HKD", "NZD": 1.19, "HKD@spéçiäl & characters": 0.01, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650011000, "data": { "id": 1, "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 9.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650012000, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 6.39, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650000000, "data": { "id": "2", "currency": "EUR", "NZD": "3.89", "HKD@spéçiäl & characters": "14.05", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650010000, "data": { "id": "4", "currency": "HKD", "NZD": "1.19", "HKD@spéçiäl & characters": "0.01", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650011000, "data": { "id": "1", "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": "1.14", "HKD@spéçiäl & characters": "9.5", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650012000, "data": { "id": "5", "currency": "USD", "NZD": "0.01", "HKD@spéçiäl & characters": "6.39", "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} # All of these records are new data. # This record has an _older_ emitted_at than the latest dedup_cdc_excluded record in messages.txt -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623900000000,"_ab_cdc_lsn":28010252,"_ab_cdc_deleted_at":1623900000000},"emitted_at":1623900000000}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"5","name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1623849314663","_ab_cdc_lsn":"26975264","_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"5","name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1623900000000","_ab_cdc_lsn":"28010252","_ab_cdc_deleted_at":"1623900000000"},"emitted_at":1623900000000}} # Previously we had a bug where we only respected deletions from the most recent _airbyte_emitted_at. This message tests that ID 5 is still correctly deleted (i.e. marked with _airbyte_active_row = 0). # This record is also deleted in messages_schema_change.txt. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1624000000000,"_ab_cdc_lsn":29010252,"_ab_cdc_deleted_at":null},"emitted_at":1624000000000}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"8","name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1624000000000","_ab_cdc_lsn":"29010252","_ab_cdc_deleted_at":null},"emitted_at":1624000000000}} # All of these records are old data. -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_log_pos": 33278,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":3,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33271,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} -{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"1","name":"mazda","_ab_cdc_updated_at":"1623849130530","_ab_cdc_lsn":"26971624","_ab_cdc_log_pos": "33274","_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"2","name":"toyata","_ab_cdc_updated_at":"1623849130549","_ab_cdc_lsn":"26971624","_ab_cdc_log_pos": "33275","_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"2","name":"bmw","_ab_cdc_updated_at":"1623849314535","_ab_cdc_lsn":"26974776","_ab_cdc_log_pos": "33278","_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"3","name":null,"_ab_cdc_updated_at":"1623849314791","_ab_cdc_lsn":"26975440","_ab_cdc_log_pos": "33274","_ab_cdc_deleted_at":"1623849314791"},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"4","name":"lotus","_ab_cdc_updated_at":"1623850868237","_ab_cdc_lsn":"27010048","_ab_cdc_log_pos": "33271","_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":"4","name":null,"_ab_cdc_updated_at":"1623850868371","_ab_cdc_lsn":"27010232","_ab_cdc_log_pos": "33279","_ab_cdc_deleted_at":"1623850868371"},"emitted_at":1623861660}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt index 7190fe88bc353..0925429d1ddec 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt @@ -1,16 +1,16 @@ -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661281900, "data": { "id": 3.14, "currency": "EUR", "new_column": 2.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 2.12, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 7}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661291900, "data": { "id": 0.12, "currency": "GBP", "new_column": 3.81, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.14, "HKD@spéçiäl & characters": 3.01, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 11}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661381900, "data": { "id": 4.22, "currency": "EUR", "new_column": 89.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.89, "HKD@spéçiäl & characters": 8.88, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} -{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661481900, "data": { "id": 1, "currency": "HKD", "new_column": 91.11, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 1.19, "HKD@spéçiäl & characters": 99.1, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661281900, "data": { "id": "3.14", "currency": "EUR", "new_column": "2.1", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "2.43", "HKD@spéçiäl & characters": "2.12", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "7"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661291900, "data": { "id": "0.12", "currency": "GBP", "new_column": "3.81", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "3.14", "HKD@spéçiäl & characters": "3.01", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "11"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661381900, "data": { "id": "4.22", "currency": "EUR", "new_column": "89.1", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "3.89", "HKD@spéçiäl & characters": "8.88", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661481900, "data": { "id": "1", "currency": "HKD", "new_column": "91.11", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "1.19", "HKD@spéçiäl & characters": "99.1", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10"}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661281900, "data": { "id": 3.14, "currency": "EUR", "new_column": 2.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 2.12, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 7}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661291900, "data": { "id": 0.12, "currency": "GBP", "new_column": 3.81, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.14, "HKD@spéçiäl & characters": 3.01, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 11}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661381900, "data": { "id": 4.22, "currency": "EUR", "new_column": 89.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.89, "HKD@spéçiäl & characters": 8.88, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} -{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661481900, "data": { "id": 1, "currency": "HKD", "new_column": 91.11, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 1.19, "HKD@spéçiäl & characters": 99.1, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661281900, "data": { "id": "3.14", "currency": "EUR", "new_column": "2.1", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "2.43", "HKD@spéçiäl & characters": "2.12", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "7"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661291900, "data": { "id": "0.12", "currency": "GBP", "new_column": "3.81", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "3.14", "HKD@spéçiäl & characters": "3.01", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "11"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661381900, "data": { "id": "4.22", "currency": "EUR", "new_column": "89.1", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "3.89", "HKD@spéçiäl & characters": "8.88", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661481900, "data": { "id": "1", "currency": "HKD", "new_column": "91.11", "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": "1.19", "HKD@spéçiäl & characters": "99.1", "column`_'with\"_quotes":"ma\"z`d'a", "USD": "10"}}} -{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":8,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623949314663,"_ab_cdc_lsn":26985264,"_ab_cdc_deleted_at":null},"emitted_at":1623960160}} -{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":"opel","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868109,"_ab_cdc_lsn":28009440,"_ab_cdc_deleted_at":null},"emitted_at":1623961660}} -{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868371,"_ab_cdc_lsn":28010232,"_ab_cdc_deleted_at":1623950868371},"emitted_at":1623961660}} +{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":"8","name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1623949314663","_ab_cdc_lsn":"26985264","_ab_cdc_deleted_at":null},"emitted_at":1623960160}} +{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":"9","name":"opel","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1623950868109","_ab_cdc_lsn":"28009440","_ab_cdc_deleted_at":null},"emitted_at":1623961660}} +{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":"9","name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1623950868371","_ab_cdc_lsn":"28010232","_ab_cdc_deleted_at":"1623950868371"},"emitted_at":1623961660}} # This message tests the ability to delete a record which was inserted in a previous sync. See messages_incremental.txt for how it was inserted. -{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1625000000000,"_ab_cdc_lsn":29020252,"_ab_cdc_deleted_at":1625000000000},"emitted_at":1625000000000}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":"8","name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":"1625000000000","_ab_cdc_lsn":"29020252","_ab_cdc_deleted_at":"1625000000000"},"emitted_at":1625000000000}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_bigquery_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_bigquery_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..6b3d183e4961e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_bigquery_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) != 'test' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_clickhouse_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_clickhouse_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..d4596982af56b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_clickhouse_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ type_binary() }}) != 'test' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mssql_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mssql_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..d4596982af56b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mssql_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ type_binary() }}) != 'test' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mysql_tidb_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mysql_tidb_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..d4596982af56b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_mysql_tidb_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ type_binary() }}) != 'test' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_oracle_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_oracle_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..b14ebf3c81375 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_oracle_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) != 'dGVzdA==' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_postgres_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_postgres_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..d4596982af56b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_postgres_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ type_binary() }}) != 'test' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_redshift_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_redshift_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..b14ebf3c81375 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_redshift_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) != 'dGVzdA==' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_snowflake_tmp/types_testing_binary_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_snowflake_tmp/types_testing_binary_values.sql new file mode 100644 index 0000000000000..a199f033f3c2a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_test_binary_tmp/dbt_data_test_snowflake_tmp/types_testing_binary_values.sql @@ -0,0 +1,14 @@ +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(property_binary_data as {{ dbt_utils.type_string() }}) != 'test' + ) +) or ( + id = 2 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) or ( + id = 3 and ( + cast(property_binary_data as {{ type_binary() }}) is not null + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py index f459f5faecd64..279e9529fa680 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py @@ -188,10 +188,10 @@ def generate_dbt_models(destination_type: DestinationType, test_root_dir: str, c ] } if column_count == 1: - catalog_config["streams"][0]["stream"]["json_schema"]["properties"]["_airbyte_id"] = {"type": "integer"} + catalog_config["streams"][0]["stream"]["json_schema"]["properties"]["_airbyte_id"] = {"$ref" : "WellKnownTypes.json#/definitions/Integer"} else: for column in [dbt_test_utils.random_string(5) for _ in range(column_count)]: - catalog_config["streams"][0]["stream"]["json_schema"]["properties"][column] = {"type": "string"} + catalog_config["streams"][0]["stream"]["json_schema"]["properties"][column] = {"$ref": "WellKnownTypes.json#/definitions/String"} catalog = os.path.join(test_root_dir, "catalog.json") with open(catalog, "w") as fh: fh.write(json.dumps(catalog_config)) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py index 0163cd1281510..55f9ca1904a4d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py @@ -76,8 +76,8 @@ def test_normalization(destination_type: DestinationType, test_resource_name: st if destination_type.value not in dbt_test_utils.get_test_targets(): pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") if ( - destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) - and test_resource_name == "test_nested_streams" + destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) + and test_resource_name == "test_nested_streams" ): pytest.skip(f"Destinations {destination_type} does not support nested streams") @@ -113,6 +113,7 @@ def run_first_normalization(destination_type: DestinationType, test_resource_nam generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "catalog.json", dbt_test_utils) # Setup test resources and models setup_dbt_test(destination_type, test_resource_name, test_root_dir) + setup_dbt_binary_test(destination_type, test_resource_name, test_root_dir) dbt_test_utils.dbt_check(destination_type, test_root_dir) # Run dbt process dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) @@ -218,7 +219,7 @@ def setup_test_dir(destination_type: DestinationType, test_resource_name: str) - def setup_input_raw_data( - destination_type: DestinationType, test_resource_name: str, test_root_dir: str, destination_config: Dict[str, Any] + destination_type: DestinationType, test_resource_name: str, test_root_dir: str, destination_config: Dict[str, Any] ) -> bool: """ We run docker images of destinations to upload test data stored in the messages.txt file for each test case. @@ -301,6 +302,77 @@ def setup_dbt_test(destination_type: DestinationType, test_resource_name: str, t ) +def setup_dbt_binary_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + """ + Prepare the data (copy) for the models for dbt test. + """ + replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") + + if DestinationType.BIGQUERY == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_bigquery_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.SNOWFLAKE == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_snowflake_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.CLICKHOUSE == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_clickhouse_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.MYSQL == destination_type or DestinationType.TIDB == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_mysql_tidb_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.MSSQL == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_mssql_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.POSTGRES == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_postgres_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.ORACLE == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_oracle_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + if DestinationType.REDSHIFT == destination_type: + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_test_binary_tmp/dbt_data_test_redshift_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + + def setup_dbt_incremental_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): """ Prepare the data (copy) for the models for dbt test. @@ -411,7 +483,6 @@ def copy_test_files(src: str, dst: str, destination_type: DestinationType, repla pattern.append(k.replace("\\", r"\\")) replace_value.append(entry[k]) if pattern and replace_value: - def copy_replace_identifiers(src, dst): dbt_test_utils.copy_replace(src, dst, pattern, replace_value) diff --git a/airbyte-integrations/bases/base-normalization/normalization/data_type.py b/airbyte-integrations/bases/base-normalization/normalization/data_type.py new file mode 100644 index 0000000000000..ccdcd4f9d0371 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/data_type.py @@ -0,0 +1,20 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +WELL_KNOWN_TYPE_VAR_NAME = "WellKnownTypes.json" + +REF_TYPE_VAR_NAME = "$ref" +TYPE_VAR_NAME = "type" +ONE_OF_VAR_NAME = "oneOf" + +STRING_TYPE = "WellKnownTypes.json#/definitions/String" +BINARY_DATA_TYPE = "WellKnownTypes.json#/definitions/BinaryData" +BOOLEAN_TYPE = "WellKnownTypes.json#/definitions/Boolean" +INTEGER_TYPE = "WellKnownTypes.json#/definitions/Integer" +NUMBER_TYPE = "WellKnownTypes.json#/definitions/Number" +DATE_TYPE = "WellKnownTypes.json#/definitions/Date" +TIMESTAMP_WITHOUT_TIMEZONE_TYPE = "WellKnownTypes.json#/definitions/TimestampWithoutTimezone" +TIMESTAMP_WITH_TIMEZONE_TYPE = "WellKnownTypes.json#/definitions/TimestampWithTimezone" +TIME_WITH_TIME_ZONE_TYPE = "WellKnownTypes.json#/definitions/TimeWithTimezone" +TIME_WITHOUT_TIME_ZONE_TYPE = "WellKnownTypes.json#/definitions/TimeWithoutTimezone" diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 231588f92903e..4443d14af843b 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -2,7 +2,6 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # - import os import re from enum import Enum @@ -10,6 +9,7 @@ from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode, SyncMode from jinja2 import Template +from normalization import data_type from normalization.destination_type import DestinationType from normalization.transform_catalog import dbt_macro from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer, transform_json_naming @@ -18,6 +18,7 @@ is_airbyte_column, is_array, is_big_integer, + is_binary_datatype, is_boolean, is_combining_node, is_date, @@ -360,12 +361,20 @@ def find_children_streams(self, from_table: str, column_names: Dict[str, Tuple[s elif is_combining_node(properties[field]): # TODO: merge properties of all combinations pass - elif "type" not in properties[field] or is_object(properties[field]["type"]): + elif ( + data_type.TYPE_VAR_NAME not in properties[field] + and data_type.REF_TYPE_VAR_NAME not in properties[field] + and data_type.ONE_OF_VAR_NAME not in properties[field] + ) or (data_type.TYPE_VAR_NAME in properties[field] and is_object(properties[field][data_type.TYPE_VAR_NAME])): # properties without 'type' field are treated like properties with 'type' = 'object' children_properties = find_properties_object([], field, properties[field]) is_nested_array = False json_column_name = column_names[field][1] - elif is_array(properties[field]["type"]) and "items" in properties[field]: + elif ( + data_type.TYPE_VAR_NAME in properties[field] + and is_array(properties[field][data_type.TYPE_VAR_NAME]) + and "items" in properties[field] + ): quoted_field = column_names[field][1] children_properties = find_properties_object([], field, properties[field]["items"]) is_nested_array = True @@ -456,13 +465,15 @@ def extract_json_column(property_name: str, json_column_name: str, definition: D table_alias = "" json_extract = jinja_call(f"json_extract('{table_alias}', {json_column_name}, {json_path})") - if "type" in definition: - if is_array(definition["type"]): + if data_type.REF_TYPE_VAR_NAME in definition or data_type.TYPE_VAR_NAME in definition or data_type.ONE_OF_VAR_NAME in definition: + if data_type.TYPE_VAR_NAME in definition and is_array(definition[data_type.TYPE_VAR_NAME]): json_extract = jinja_call(f"json_extract_array({json_column_name}, {json_path}, {normalized_json_path})") - if is_simple_property(definition.get("items", {"type": "object"})): + if is_simple_property(definition.get("items", {data_type.TYPE_VAR_NAME: "object"})): json_extract = jinja_call(f"json_extract_string_array({json_column_name}, {json_path}, {normalized_json_path})") - elif is_object(definition["type"]): + elif data_type.TYPE_VAR_NAME in definition and is_object(definition[data_type.TYPE_VAR_NAME]): json_extract = jinja_call(f"json_extract('{table_alias}', {json_column_name}, {json_path}, {normalized_json_path})") + elif data_type.REF_TYPE_VAR_NAME in definition and (is_date(definition) or is_time(definition) or is_datetime(definition)): + json_extract = jinja_call(f"json_extract_scalar({json_column_name}, {json_path}, {normalized_json_path})") elif is_simple_property(definition): json_extract = jinja_call(f"json_extract_scalar({json_column_name}, {json_path}, {normalized_json_path})") @@ -504,22 +515,44 @@ def cast_property_types(self, column_names: Dict[str, Tuple[str, str]]) -> List[ def cast_property_type(self, property_name: str, column_name: str, jinja_column: str) -> Any: # noqa: C901 definition = self.properties[property_name] - if "type" not in definition: + if ( + data_type.TYPE_VAR_NAME not in definition + and data_type.REF_TYPE_VAR_NAME not in definition + and data_type.ONE_OF_VAR_NAME not in definition + ): print(f"WARN: Unknown type for column {property_name} at {self.current_json_path()}") return column_name - elif is_array(definition["type"]): + elif data_type.TYPE_VAR_NAME in definition and is_array(definition[data_type.TYPE_VAR_NAME]): return column_name - elif is_object(definition["type"]): + elif data_type.TYPE_VAR_NAME in definition and is_object(definition[data_type.TYPE_VAR_NAME]): sql_type = jinja_call("type_json()") - # Treat simple types from narrower to wider scope type: boolean < integer < number < string - elif is_boolean(definition["type"], definition): + # Treat simple types from wider scope TO narrower type: string > boolean > integer > number + elif (data_type.REF_TYPE_VAR_NAME in definition and is_string(definition)) or ( + data_type.ONE_OF_VAR_NAME in definition and is_string(definition) + ): + sql_type = jinja_call("dbt_utils.type_string()") + if self.destination_type == DestinationType.CLICKHOUSE: + trimmed_column_name = f"trim(BOTH '\"' from {column_name})" + sql_type = f"'{sql_type}'" + return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" + elif self.destination_type == DestinationType.MYSQL: + # Cast to `text` datatype. See https://github.com/airbytehq/airbyte/issues/7994 + sql_type = f"{sql_type}(1024)" + + elif (data_type.REF_TYPE_VAR_NAME in definition and is_boolean(definition)) or ( + data_type.ONE_OF_VAR_NAME in definition and is_boolean(definition) + ): cast_operation = jinja_call(f"cast_to_boolean({jinja_column})") return f"{cast_operation} as {column_name}" elif is_big_integer(definition): sql_type = jinja_call("type_very_large_integer()") - elif is_long(definition["type"], definition): + elif (data_type.REF_TYPE_VAR_NAME in definition and is_long(definition)) or ( + data_type.ONE_OF_VAR_NAME in definition and is_long(definition) + ): sql_type = jinja_call("dbt_utils.type_bigint()") - elif is_number(definition["type"]): + elif (data_type.REF_TYPE_VAR_NAME in definition and is_number(definition)) or ( + data_type.ONE_OF_VAR_NAME in definition and is_number(definition) + ): sql_type = jinja_call("dbt_utils.type_float()") elif is_datetime(definition): if self.destination_type == DestinationType.SNOWFLAKE: @@ -574,21 +607,59 @@ def cast_property_type(self, property_name: str, column_name: str, jinja_column: return f'nullif(cast({column_name} as {sql_type}), "") as {column_name}' replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") return f"cast({replace_operation} as {sql_type}) as {column_name}" - elif is_string(definition["type"]): - sql_type = jinja_call("dbt_utils.type_string()") - if self.destination_type == DestinationType.CLICKHOUSE: + elif (data_type.REF_TYPE_VAR_NAME in definition and is_binary_datatype(definition)) or ( + data_type.ONE_OF_VAR_NAME in definition and is_binary_datatype(definition) + ): + if self.destination_type.value == DestinationType.POSTGRES.value: + # sql_type = "bytea" + sql_type = jinja_call("type_binary()") + return f"cast(decode({column_name}, 'base64') as {sql_type}) as {column_name}" + elif self.destination_type.value == DestinationType.BIGQUERY.value: + # sql_type = "bytes" + sql_type = jinja_call("type_binary()") + return f"cast(FROM_BASE64({column_name}) as {sql_type}) as {column_name}" + elif self.destination_type.value == DestinationType.MYSQL.value or self.destination_type.value == DestinationType.TIDB.value: + # sql_type = "BINARY" + sql_type = jinja_call("type_binary()") + return f"cast(FROM_BASE64({column_name}) as {sql_type}) as {column_name}" + elif self.destination_type.value == DestinationType.MSSQL.value: + # sql_type = "VARBINARY(MAX)" + sql_type = jinja_call("type_binary()") + return f"CAST({column_name} as XML ).value('.','{sql_type}') as {column_name}" + elif self.destination_type.value == DestinationType.SNOWFLAKE.value: + # sql_type = "VARBINARY" + sql_type = jinja_call("type_binary()") + return f"cast(BASE64_DECODE_BINARY({column_name}) as {sql_type}) as {column_name}" + elif self.destination_type.value == DestinationType.CLICKHOUSE.value: + # sql_type = "VARBINARY" + sql_type = jinja_call("type_binary()") trimmed_column_name = f"trim(BOTH '\"' from {column_name})" - sql_type = f"'{sql_type}'" - return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" - elif self.destination_type == DestinationType.MYSQL: - # Cast to `text` datatype. See https://github.com/airbytehq/airbyte/issues/7994 - sql_type = f"{sql_type}(1024)" + return f"cast(FROM_BASE64({trimmed_column_name}) as {sql_type}) as {column_name}" + else: + sql_type = jinja_call("dbt_utils.type_string()") + else: - print(f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}") + if data_type.REF_TYPE_VAR_NAME in definition: + print( + f"WARN: Unknown ref type {definition[data_type.REF_TYPE_VAR_NAME]} for column {property_name} at {self.current_json_path()}" + ) + elif data_type.ONE_OF_VAR_NAME in definition: + print( + f"WARN: Unknown oneOf simple type {definition[data_type.ONE_OF_VAR_NAME]} for column {property_name} at {self.current_json_path()}" + ) + else: + print(f"WARN: Unknown type {definition[data_type.TYPE_VAR_NAME]} for column {property_name} at {self.current_json_path()}") return column_name if self.destination_type == DestinationType.CLICKHOUSE: - return f"accurateCastOrNull({column_name}, '{sql_type}') as {column_name}" + if data_type.REF_TYPE_VAR_NAME in definition and ( + data_type.NUMBER_TYPE in definition[data_type.REF_TYPE_VAR_NAME] + or data_type.INTEGER_TYPE in definition[data_type.REF_TYPE_VAR_NAME] + ): + trimmed_column_name = f"trim(BOTH '\"' from {column_name})" + return f"accurateCastOrNull({trimmed_column_name}, '{sql_type}') as {column_name}" + else: + return f"accurateCastOrNull({column_name}, '{sql_type}') as {column_name}" else: return f"cast({column_name} as {sql_type}) as {column_name}" @@ -714,13 +785,17 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De the curly brackets. """ - if "type" not in definition: + if ( + data_type.TYPE_VAR_NAME not in definition + and data_type.REF_TYPE_VAR_NAME not in definition + and data_type.ONE_OF_VAR_NAME not in definition + ): col = column_name - elif is_boolean(definition["type"], definition): + elif data_type.REF_TYPE_VAR_NAME in definition and is_boolean(definition): col = f"boolean_to_string({column_name})" - elif is_array(definition["type"]): + elif data_type.TYPE_VAR_NAME in definition and is_array(definition[data_type.TYPE_VAR_NAME]): col = f"array_to_string({column_name})" - elif is_object(definition["type"]): + elif data_type.TYPE_VAR_NAME in definition and is_object(definition[data_type.TYPE_VAR_NAME]): col = f"object_to_string({column_name})" else: col = column_name @@ -799,7 +874,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup if ( self.destination_type == DestinationType.BIGQUERY and self.get_cursor_field_property_name(column_names) != self.airbyte_emitted_at - and is_number(self.properties[self.get_cursor_field_property_name(column_names)]["type"]) + and is_number(self.properties[self.get_cursor_field_property_name(column_names)]) ): # partition by float columns is not allowed in BigQuery, cast it to string airbyte_start_at_string = ( @@ -1047,11 +1122,14 @@ def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], pa if path and len(path) == 1: field = path[0] if not is_airbyte_column(field): - if "type" in self.properties[field]: - property_type = self.properties[field]["type"] + if data_type.REF_TYPE_VAR_NAME in self.properties[field] or data_type.ONE_OF_VAR_NAME in self.properties[field]: + if data_type.ONE_OF_VAR_NAME in self.properties[field]: + property_type = data_type.ONE_OF_VAR_NAME + else: + property_type = data_type.REF_TYPE_VAR_NAME else: property_type = "object" - if is_number(property_type) or is_object(property_type): + if is_number(self.properties[field]) or is_object(property_type): # some destinations don't handle float columns (or complex types) as primary keys, turn them to string return f"cast({column_names[field][0]} as {jinja_call('dbt_utils.type_string()')})" else: @@ -1493,7 +1571,7 @@ def find_properties_object(path: List[str], field: str, properties) -> Dict[str, elif "properties" in properties: # we found a properties object return {current: properties["properties"]} - elif "type" in properties and is_simple_property(properties): + elif data_type.REF_TYPE_VAR_NAME in properties and is_simple_property(properties): # we found a basic type return {current: {}} elif isinstance(properties, dict): diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py index 862ae722d9edf..10d6ee35af145 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py @@ -2,9 +2,9 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from typing import Callable, Set, Union -from typing import Set, Union - +from normalization import data_type from normalization.transform_catalog import dbt_macro @@ -16,76 +16,77 @@ def remove_jinja(command: str) -> str: return str(command).replace("{{ ", "").replace(" }}", "") -def is_string(property_type) -> bool: - return property_type == "string" or "string" in property_type +def is_type_included(definition: dict, is_type: Callable[[dict], bool]) -> bool: + if data_type.ONE_OF_VAR_NAME in definition: + return bool(any(is_type(option) for option in definition[data_type.ONE_OF_VAR_NAME])) + else: + return is_type(definition) + + +def get_reftype_function(type: str) -> Callable[[dict], bool]: + def is_reftype(definition: dict) -> bool: + return data_type.REF_TYPE_VAR_NAME in definition and type == definition[data_type.REF_TYPE_VAR_NAME] + + return is_reftype + + +def is_string(definition: dict) -> bool: + return is_type_included(definition, get_reftype_function(data_type.STRING_TYPE)) + + +def is_binary_datatype(definition: dict) -> bool: + return is_type_included(definition, get_reftype_function(data_type.BINARY_DATA_TYPE)) def is_datetime(definition: dict) -> bool: - return ( - is_string(definition["type"]) - and ("format" in definition.keys()) - and (definition["format"] == "date-time" or "date-time" in definition["format"]) - ) + return is_datetime_with_timezone(definition) or is_datetime_without_timezone(definition) def is_datetime_without_timezone(definition: dict) -> bool: - return is_datetime(definition) and definition.get("airbyte_type") == "timestamp_without_timezone" + return is_type_included(definition, get_reftype_function(data_type.TIMESTAMP_WITHOUT_TIMEZONE_TYPE)) def is_datetime_with_timezone(definition: dict) -> bool: - return is_datetime(definition) and (not definition.get("airbyte_type") or definition.get("airbyte_type") == "timestamp_with_timezone") + return is_type_included(definition, get_reftype_function(data_type.TIMESTAMP_WITH_TIMEZONE_TYPE)) def is_date(definition: dict) -> bool: - return ( - is_string(definition["type"]) - and ("format" in definition.keys()) - and (definition["format"] == "date" or "date" in definition["format"]) - ) + return is_type_included(definition, get_reftype_function(data_type.DATE_TYPE)) def is_time(definition: dict) -> bool: - return is_string(definition["type"]) and definition.get("format") == "time" + return is_time_with_timezone(definition) or is_time_without_timezone(definition) def is_time_with_timezone(definition: dict) -> bool: - return is_time(definition) and definition.get("airbyte_type") == "time_with_timezone" + return is_type_included(definition, get_reftype_function(data_type.TIME_WITH_TIME_ZONE_TYPE)) def is_time_without_timezone(definition: dict) -> bool: - return is_time(definition) and definition.get("airbyte_type") == "time_without_timezone" + return is_type_included(definition, get_reftype_function(data_type.TIME_WITHOUT_TIME_ZONE_TYPE)) -def is_number(property_type) -> bool: - if is_string(property_type): +def is_number(definition: dict) -> bool: + if is_string(definition): # Handle union type, give priority to wider scope types return False - return property_type == "number" or "number" in property_type + return is_type_included(definition, get_reftype_function(data_type.NUMBER_TYPE)) +# this is obsolete type that will not be used in new datatypes def is_big_integer(definition: dict) -> bool: - return "airbyte_type" in definition and definition["airbyte_type"] == "big_integer" + return False -def is_long(property_type, definition: dict) -> bool: - # Check specifically for {type: number, airbyte_type: integer} - if ( - (property_type == "number" or "number" in property_type) - and "airbyte_type" in definition - and definition["airbyte_type"] == "integer" - ): - return True - if is_string(property_type) or is_number(property_type): - # Handle union type, give priority to wider scope types - return False - return property_type == "integer" or "integer" in property_type +def is_long(definition: dict) -> bool: + return is_type_included(definition, get_reftype_function(data_type.INTEGER_TYPE)) -def is_boolean(property_type, definition: dict) -> bool: - if is_string(property_type) or is_number(property_type) or is_big_integer(definition) or is_long(property_type, definition): +def is_boolean(definition: dict) -> bool: + if is_string(definition) or is_number(definition) or is_big_integer(definition) or is_long(definition): # Handle union type, give priority to wider scope types return False - return property_type == "boolean" or "boolean" in property_type + return is_type_included(definition, get_reftype_function(data_type.BOOLEAN_TYPE)) def is_array(property_type) -> bool: @@ -101,18 +102,24 @@ def is_airbyte_column(name: str) -> bool: def is_simple_property(definition: dict) -> bool: - if "type" not in definition: - property_type = "object" - else: - property_type = definition["type"] return ( - is_string(property_type) + is_string(definition) or is_big_integer(definition) - or is_long(property_type, definition) - or is_number(property_type) - or is_boolean(property_type, definition) + or is_long(definition) + or is_number(definition) + or is_boolean(definition) + or is_date(definition) + or is_time(definition) + or is_datetime(definition) + or is_binary_datatype(definition) ) def is_combining_node(properties: dict) -> Set[str]: - return set(properties).intersection({"anyOf", "oneOf", "allOf"}) + # this case appears when we have analog of old protocol like id: {type:[number, string]} and it's handled separately + if data_type.ONE_OF_VAR_NAME in properties and any( + data_type.WELL_KNOWN_TYPE_VAR_NAME in option[data_type.REF_TYPE_VAR_NAME] for option in properties[data_type.ONE_OF_VAR_NAME] + ): + return set() + else: + return set(properties).intersection({"anyOf", "oneOf", "allOf"}) diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json index 7ffa2f36d4421..8b1bb673bd45a 100644 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json @@ -7,7 +7,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -27,7 +27,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -46,7 +46,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json index 94e6b4a798d9a..882af94c7d22d 100644 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json @@ -7,19 +7,19 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "body": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "name": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "title": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "status": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "adlabels": { "type": ["null", "array"], @@ -27,24 +27,22 @@ "type": "object", "properties": { "id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "created_time": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone" }, "updated_time": { - "type": "string", - "format": "date-time" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone" } } } }, "link_url": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "image_crops": { "type": ["null", "object"], @@ -54,7 +52,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -63,7 +61,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -72,7 +70,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -81,7 +79,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -90,7 +88,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -99,7 +97,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -108,7 +106,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } } @@ -118,22 +116,22 @@ "type": ["null", "object"], "properties": { "page_id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "link_data": { "type": ["null", "object"], "properties": { "link": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "name": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "caption": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "message": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "image_crops": { "type": ["null", "object"], @@ -143,7 +141,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -152,7 +150,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -161,7 +159,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -170,7 +168,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -179,7 +177,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -188,7 +186,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } }, @@ -197,7 +195,7 @@ "items": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "WellKnownTypes.json#/definitions/Integer" } } } @@ -212,13 +210,13 @@ "type": "object", "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_store_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -229,13 +227,13 @@ "type": "object", "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_store_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -246,13 +244,13 @@ "type": "object", "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_store_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -263,16 +261,16 @@ "type": "object", "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "class": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "package": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -284,7 +282,7 @@ "type": ["null", "object"], "properties": { "message": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -292,27 +290,27 @@ "type": ["null", "object"], "properties": { "url": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "caption": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "image_hash": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "page_welcome_message": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "branded_content_sponsor_page_id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "branded_content_sponsor_relationship": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, "instagram_actor_id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -323,13 +321,13 @@ "type": ["null", "object"], "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_store_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -337,10 +335,10 @@ "type": ["null", "object"], "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "should_fallback": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -348,13 +346,13 @@ "type": ["null", "object"], "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_store_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -362,7 +360,7 @@ "type": ["null", "object"], "properties": { "app_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -370,13 +368,13 @@ "type": ["null", "object"], "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_store_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -384,13 +382,13 @@ "type": ["null", "object"], "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "package": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -398,13 +396,13 @@ "type": ["null", "object"], "properties": { "url": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_id": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" }, "app_name": { - "type": "string" + "$ref": "WellKnownTypes.json#/definitions/String" } } } diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json index 336cf17d71941..3197a208302c7 100644 --- a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json @@ -8,7 +8,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -28,13 +28,13 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" }, "stream_name": { "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } } @@ -56,7 +56,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, @@ -76,7 +76,7 @@ "type": ["null", "object"], "properties": { "id": { - "type": ["null", "string"] + "$ref": "WellKnownTypes.json#/definitions/String" } } }, diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py index cfc48bacbef1f..232fe5652b63c 100644 --- a/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py @@ -64,13 +64,13 @@ def test_cursor_field(cursor_field: List[str], expecting_exception: bool, expect @pytest.mark.parametrize( "primary_key, column_type, expecting_exception, expected_primary_keys, expected_final_primary_key_string", [ - ([["id"]], "string", False, ["id"], "{{ adapter.quote('id') }}"), - ([["id"]], "number", False, ["id"], "cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }})"), - ([["first_name"], ["last_name"]], "string", False, ["first_name", "last_name"], "first_name, last_name"), - ([["float_id"]], "number", False, ["float_id"], "cast(float_id as {{ dbt_utils.type_string() }})"), - ([["_airbyte_emitted_at"]], "string", False, [], "cast(_airbyte_emitted_at as {{ dbt_utils.type_string() }})"), - (None, "string", True, [], ""), - ([["parent", "nested_field"]], "string", True, [], ""), + ([["id"]], "WellKnownTypes.json#/definitions/String", False, ["id"], "{{ adapter.quote('id') }}"), + ([["id"]], "WellKnownTypes.json#/definitions/Number", False, ["id"], "cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }})"), + ([["first_name"], ["last_name"]], "WellKnownTypes.json#/definitions/String", False, ["first_name", "last_name"], "first_name, last_name"), + ([["float_id"]], "WellKnownTypes.json#/definitions/Number", False, ["float_id"], "cast(float_id as {{ dbt_utils.type_string() }})"), + ([["_airbyte_emitted_at"]], "WellKnownTypes.json#/definitions/String", False, [], "cast(_airbyte_emitted_at as {{ dbt_utils.type_string() }})"), + (None, "WellKnownTypes.json#/definitions/String", True, [], ""), + ([["parent", "nested_field"]], "WellKnownTypes.json#/definitions/String", True, [], ""), ], ) def test_primary_key( @@ -91,7 +91,7 @@ def test_primary_key( cursor_field=[], primary_key=primary_key, json_column_name="json_column_name", - properties={key: {"type": column_type} for key in expected_primary_keys}, + properties={key: {"$ref": column_type} for key in expected_primary_keys}, tables_registry=TableNameRegistry(DestinationType.POSTGRES), from_table="", ) From ee150e33e97057335fb8cf03272c3a02e5ed73c9 Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Tue, 10 Jan 2023 10:58:44 -0800 Subject: [PATCH 06/11] Update airbyte protocol migration (#20745) * Extract MigrationContainer from AirbyteMessageMigrator * Add ConfiguredAirbyteCatalogMigrations * Add ConfiguredAirbyteCatalog to AirbyteMessageMigrations * Enable ConfiguredAirbyteCatalog migration * Fix tests * Remove extra this. * Add missing docs * Typo Co-authored-by: Edward Gao --- .../protocol/AirbyteMessageMigrator.java | 96 +++++------------ .../AirbyteMessageVersionedMigrator.java | 10 +- ...irbyteMessageVersionedMigratorFactory.java | 30 ------ ...rbyteProtocolVersionedMigratorFactory.java | 37 +++++++ .../ConfiguredAirbyteCatalogMigrator.java | 68 ++++++++++++ .../protocol/DefaultProtocolSerializer.java | 17 +++ .../commons/protocol/ProtocolSerializer.java | 13 +++ .../protocol/VersionedProtocolSerializer.java | 32 ++++++ .../migrations/AirbyteMessageMigration.java | 21 ++-- .../migrations/AirbyteMessageMigrationV1.java | 8 +- .../ConfiguredAirbyteCatalogMigration.java | 25 +++++ .../ConfiguredAirbyteCatalogMigrationV1.java | 37 +++++++ .../protocol/migrations/Migration.java | 21 ++++ .../migrations/MigrationContainer.java | 101 ++++++++++++++++++ .../AirbyteMessageMigratorMicronautTest.java | 28 ----- .../protocol/AirbyteMessageMigratorTest.java | 37 ++++--- .../protocol/MigratorsMicronautTest.java | 37 +++++++ .../internal/DefaultAirbyteDestination.java | 12 ++- .../internal/DefaultAirbyteSource.java | 16 ++- ...VersionedAirbyteMessageBufferedWriter.java | 9 +- ...edAirbyteMessageBufferedWriterFactory.java | 16 ++- .../VersionedAirbyteStreamFactory.java | 21 ++-- .../DefaultAirbyteDestinationTest.java | 9 +- .../internal/DefaultAirbyteSourceTest.java | 14 +-- .../VersionedAirbyteStreamFactoryTest.java | 36 ++++--- .../config/ContainerOrchestratorFactory.java | 4 +- .../ReplicationJobOrchestrator.java | 25 +++-- .../ContainerOrchestratorFactoryTest.java | 14 +-- .../CheckConnectionActivityImpl.java | 9 +- .../catalog/DiscoverCatalogActivityImpl.java | 9 +- .../temporal/spec/SpecActivityImpl.java | 9 +- .../sync/ReplicationActivityImpl.java | 15 ++- 32 files changed, 593 insertions(+), 243 deletions(-) delete mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigratorFactory.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteProtocolVersionedMigratorFactory.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ConfiguredAirbyteCatalogMigrator.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/DefaultProtocolSerializer.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ProtocolSerializer.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/VersionedProtocolSerializer.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigration.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/Migration.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/MigrationContainer.java delete mode 100644 airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorMicronautTest.java create mode 100644 airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/MigratorsMicronautTest.java diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageMigrator.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageMigrator.java index 1906e143c33a1..c171a559b822a 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageMigrator.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageMigrator.java @@ -6,15 +6,14 @@ import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.protocol.migrations.AirbyteMessageMigration; +import io.airbyte.commons.protocol.migrations.MigrationContainer; import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import jakarta.annotation.PostConstruct; import jakarta.inject.Singleton; -import java.util.Collection; -import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; /** * AirbyteProtocol Message Migrator @@ -25,104 +24,59 @@ @Singleton public class AirbyteMessageMigrator { - private final List> migrationsToRegister; - private final SortedMap> migrations = new TreeMap<>(); - private String mostRecentMajorVersion = ""; + private final MigrationContainer> migrationContainer; - public AirbyteMessageMigrator(List> migrations) { - migrationsToRegister = migrations; - } - - public AirbyteMessageMigrator() { - this(Collections.emptyList()); + public AirbyteMessageMigrator(final List> migrations) { + migrationContainer = new MigrationContainer<>(migrations); } @PostConstruct public void initialize() { - migrationsToRegister.forEach(this::registerMigration); + migrationContainer.initialize(); } /** * Downgrade a message from the most recent version to the target version by chaining all the * required migrations */ - public PreviousVersion downgrade(final CurrentVersion message, final Version target) { - if (target.getMajorVersion().equals(mostRecentMajorVersion)) { - return (PreviousVersion) message; - } - - Object result = message; - Object[] selectedMigrations = selectMigrations(target).toArray(); - for (int i = selectedMigrations.length; i > 0; --i) { - result = applyDowngrade((AirbyteMessageMigration) selectedMigrations[i - 1], result); - } - return (PreviousVersion) result; + public PreviousVersion downgrade(final CurrentVersion message, + final Version target, + final Optional configuredAirbyteCatalog) { + return migrationContainer.downgrade(message, target, (migration, msg) -> applyDowngrade(migration, msg, configuredAirbyteCatalog)); } /** * Upgrade a message from the source version to the most recent version by chaining all the required * migrations */ - public CurrentVersion upgrade(final PreviousVersion message, final Version source) { - if (source.getMajorVersion().equals(mostRecentMajorVersion)) { - return (CurrentVersion) message; - } - - Object result = message; - for (var migration : selectMigrations(source)) { - result = applyUpgrade(migration, result); - } - return (CurrentVersion) result; + public CurrentVersion upgrade(final PreviousVersion message, + final Version source, + final Optional configuredAirbyteCatalog) { + return migrationContainer.upgrade(message, source, (migration, msg) -> applyUpgrade(migration, msg, configuredAirbyteCatalog)); } public Version getMostRecentVersion() { - return new Version(mostRecentMajorVersion, "0", "0"); - } - - private Collection> selectMigrations(final Version version) { - final Collection> results = migrations.tailMap(version.getMajorVersion()).values(); - if (results.isEmpty()) { - throw new RuntimeException("Unsupported migration version " + version.serialize()); - } - return results; + return migrationContainer.getMostRecentVersion(); } // Helper function to work around type casting - private PreviousVersion applyDowngrade(final AirbyteMessageMigration migration, - final Object message) { - return migration.downgrade((CurrentVersion) message); + private static PreviousVersion applyDowngrade(final AirbyteMessageMigration migration, + final Object message, + final Optional configuredAirbyteCatalog) { + return migration.downgrade((CurrentVersion) message, configuredAirbyteCatalog); } // Helper function to work around type casting - private CurrentVersion applyUpgrade(final AirbyteMessageMigration migration, - final Object message) { - return migration.upgrade((PreviousVersion) message); - } - - /** - * Store migration in a sorted map key by the major of the lower version of the migration. - * - * The goal is to be able to retrieve the list of migrations to apply to get to/from a given - * version. We are only keying on the lower version because the right side (most recent version of - * the migration range) is always current version. - */ - @VisibleForTesting - void registerMigration(final AirbyteMessageMigration migration) { - final String key = migration.getPreviousVersion().getMajorVersion(); - if (!migrations.containsKey(key)) { - migrations.put(key, migration); - if (migration.getCurrentVersion().getMajorVersion().compareTo(mostRecentMajorVersion) > 0) { - mostRecentMajorVersion = migration.getCurrentVersion().getMajorVersion(); - } - } else { - throw new RuntimeException("Trying to register a duplicated migration " + migration.getClass().getName()); - } + private static CurrentVersion applyUpgrade(final AirbyteMessageMigration migration, + final Object message, + final Optional configuredAirbyteCatalog) { + return migration.upgrade((PreviousVersion) message, configuredAirbyteCatalog); } // Used for inspection of the injection @VisibleForTesting Set getMigrationKeys() { - return migrations.keySet(); + return migrationContainer.getMigrationKeys(); } } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java index c421777c03eb2..a0474d7663c7f 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigrator.java @@ -6,6 +6,8 @@ import io.airbyte.commons.version.Version; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Optional; /** * Wraps message migration from a fixed version to the most recent version @@ -20,12 +22,12 @@ public AirbyteMessageVersionedMigrator(final AirbyteMessageMigrator migrator, fi this.version = version; } - public OriginalMessageType downgrade(final AirbyteMessage message) { - return migrator.downgrade(message, version); + public OriginalMessageType downgrade(final AirbyteMessage message, final Optional configuredAirbyteCatalog) { + return migrator.downgrade(message, version, configuredAirbyteCatalog); } - public AirbyteMessage upgrade(final OriginalMessageType message) { - return migrator.upgrade(message, version); + public AirbyteMessage upgrade(final OriginalMessageType message, final Optional configuredAirbyteCatalog) { + return migrator.upgrade(message, version, configuredAirbyteCatalog); } public Version getVersion() { diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigratorFactory.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigratorFactory.java deleted file mode 100644 index dec45297880e4..0000000000000 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteMessageVersionedMigratorFactory.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.protocol; - -import io.airbyte.commons.version.Version; -import jakarta.inject.Singleton; - -/** - * Factory to build AirbyteMessageVersionedMigrator - */ -@Singleton -public class AirbyteMessageVersionedMigratorFactory { - - private final AirbyteMessageMigrator migrator; - - public AirbyteMessageVersionedMigratorFactory(final AirbyteMessageMigrator migrator) { - this.migrator = migrator; - } - - public AirbyteMessageVersionedMigrator getVersionedMigrator(final Version version) { - return new AirbyteMessageVersionedMigrator<>(this.migrator, version); - } - - public Version getMostRecentVersion() { - return migrator.getMostRecentVersion(); - } - -} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteProtocolVersionedMigratorFactory.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteProtocolVersionedMigratorFactory.java new file mode 100644 index 0000000000000..52af4e2233e22 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/AirbyteProtocolVersionedMigratorFactory.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol; + +import io.airbyte.commons.version.Version; +import jakarta.inject.Singleton; + +/** + * Factory to build AirbyteMessageVersionedMigrator + */ +@Singleton +public class AirbyteProtocolVersionedMigratorFactory { + + private final AirbyteMessageMigrator airbyteMessageMigrator; + private final ConfiguredAirbyteCatalogMigrator configuredAirbyteCatalogMigrator; + + public AirbyteProtocolVersionedMigratorFactory(final AirbyteMessageMigrator airbyteMessageMigrator, + final ConfiguredAirbyteCatalogMigrator configuredAirbyteCatalogMigrator) { + this.airbyteMessageMigrator = airbyteMessageMigrator; + this.configuredAirbyteCatalogMigrator = configuredAirbyteCatalogMigrator; + } + + public AirbyteMessageVersionedMigrator getAirbyteMessageMigrator(final Version version) { + return new AirbyteMessageVersionedMigrator<>(airbyteMessageMigrator, version); + } + + public final VersionedProtocolSerializer getProtocolSerializer(final Version version) { + return new VersionedProtocolSerializer(configuredAirbyteCatalogMigrator, version); + } + + public Version getMostRecentVersion() { + return airbyteMessageMigrator.getMostRecentVersion(); + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ConfiguredAirbyteCatalogMigrator.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ConfiguredAirbyteCatalogMigrator.java new file mode 100644 index 0000000000000..7c2c2111445d9 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ConfiguredAirbyteCatalogMigrator.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol; + +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.protocol.migrations.ConfiguredAirbyteCatalogMigration; +import io.airbyte.commons.protocol.migrations.MigrationContainer; +import io.airbyte.commons.version.Version; +import jakarta.annotation.PostConstruct; +import jakarta.inject.Singleton; +import java.util.List; +import java.util.Set; + +@Singleton +public class ConfiguredAirbyteCatalogMigrator { + + private final MigrationContainer> migrationContainer; + + public ConfiguredAirbyteCatalogMigrator(final List> migrations) { + migrationContainer = new MigrationContainer<>(migrations); + } + + @PostConstruct + public void initialize() { + migrationContainer.initialize(); + } + + /** + * Downgrade a message from the most recent version to the target version by chaining all the + * required migrations + */ + public PreviousVersion downgrade(final CurrentVersion message, final Version target) { + return migrationContainer.downgrade(message, target, ConfiguredAirbyteCatalogMigrator::applyDowngrade); + } + + /** + * Upgrade a message from the source version to the most recent version by chaining all the required + * migrations + */ + public CurrentVersion upgrade(final PreviousVersion message, final Version source) { + return migrationContainer.upgrade(message, source, ConfiguredAirbyteCatalogMigrator::applyUpgrade); + } + + public Version getMostRecentVersion() { + return migrationContainer.getMostRecentVersion(); + } + + // Helper function to work around type casting + private static PreviousVersion applyDowngrade(final ConfiguredAirbyteCatalogMigration migration, + final Object message) { + return migration.downgrade((CurrentVersion) message); + } + + // Helper function to work around type casting + private static CurrentVersion applyUpgrade(final ConfiguredAirbyteCatalogMigration migration, + final Object message) { + return migration.upgrade((PreviousVersion) message); + } + + // Used for inspection of the injection + @VisibleForTesting + Set getMigrationKeys() { + return migrationContainer.getMigrationKeys(); + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/DefaultProtocolSerializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/DefaultProtocolSerializer.java new file mode 100644 index 0000000000000..0e6ee93f5bbdd --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/DefaultProtocolSerializer.java @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; + +public class DefaultProtocolSerializer implements ProtocolSerializer { + + @Override + public String serialize(ConfiguredAirbyteCatalog configuredAirbyteCatalog) { + return Jsons.serialize(configuredAirbyteCatalog); + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ProtocolSerializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ProtocolSerializer.java new file mode 100644 index 0000000000000..527697ebd1caf --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/ProtocolSerializer.java @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol; + +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; + +public interface ProtocolSerializer { + + String serialize(final ConfiguredAirbyteCatalog configuredAirbyteCatalog); + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/VersionedProtocolSerializer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/VersionedProtocolSerializer.java new file mode 100644 index 0000000000000..a53c068a59843 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/VersionedProtocolSerializer.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; + +/** + * Serialize a ConfiguredAirbyteCatalog to the specified version + *

+ * This Serializer expects a ConfiguredAirbyteCatalog from the Current version of the platform, + * converts it to the target protocol version before serializing it. + */ +public class VersionedProtocolSerializer implements ProtocolSerializer { + + private final ConfiguredAirbyteCatalogMigrator configuredAirbyteCatalogMigrator; + private final Version protocolVersion; + + public VersionedProtocolSerializer(final ConfiguredAirbyteCatalogMigrator configuredAirbyteCatalogMigrator, final Version protocolVersion) { + this.configuredAirbyteCatalogMigrator = configuredAirbyteCatalogMigrator; + this.protocolVersion = protocolVersion; + } + + @Override + public String serialize(final ConfiguredAirbyteCatalog configuredAirbyteCatalog) { + return Jsons.serialize(configuredAirbyteCatalogMigrator.downgrade(configuredAirbyteCatalog, protocolVersion)); + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigration.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigration.java index ce3746198bbe9..f193414da705b 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigration.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigration.java @@ -4,7 +4,8 @@ package io.airbyte.commons.protocol.migrations; -import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Optional; /** * AirbyteProtocol message migration interface @@ -12,32 +13,24 @@ * @param The Old AirbyteMessage type * @param The New AirbyteMessage type */ -public interface AirbyteMessageMigration { +public interface AirbyteMessageMigration extends Migration { /** * Downgrades a message to from the new version to the old version * * @param message: the message to downgrade + * @param configuredAirbyteCatalog: the ConfiguredAirbyteCatalog of the connection when applicable * @return the downgraded message */ - PreviousVersion downgrade(final CurrentVersion message); + PreviousVersion downgrade(final CurrentVersion message, final Optional configuredAirbyteCatalog); /** * Upgrades a message from the old version to the new version * * @param message: the message to upgrade + * @param configuredAirbyteCatalog: the ConfiguredAirbyteCatalog of the connection when applicable * @return the upgrade message */ - CurrentVersion upgrade(final PreviousVersion message); - - /** - * The Old version, note that due to semver, the important piece of information is the Major. - */ - Version getPreviousVersion(); - - /** - * The New version, note that due to semver, the important piece of information is the Major. - */ - Version getCurrentVersion(); + CurrentVersion upgrade(final PreviousVersion message, final Optional configuredAirbyteCatalog); } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java index 501fca10c464c..b48a373f93297 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java @@ -7,8 +7,10 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.version.AirbyteProtocolVersion; import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.AirbyteMessage; import jakarta.inject.Singleton; +import java.util.Optional; /** * Placeholder AirbyteMessage Migration from v0 to v1 @@ -17,12 +19,14 @@ public class AirbyteMessageMigrationV1 implements AirbyteMessageMigration { @Override - public AirbyteMessage downgrade(io.airbyte.protocol.models.AirbyteMessage message) { + public AirbyteMessage downgrade(final io.airbyte.protocol.models.AirbyteMessage message, + final Optional configuredAirbyteCatalog) { return Jsons.object(Jsons.jsonNode(message), AirbyteMessage.class); } @Override - public io.airbyte.protocol.models.AirbyteMessage upgrade(AirbyteMessage message) { + public io.airbyte.protocol.models.AirbyteMessage upgrade(final AirbyteMessage message, + final Optional configuredAirbyteCatalog) { return Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.AirbyteMessage.class); } diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigration.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigration.java new file mode 100644 index 0000000000000..fa3e32862d876 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigration.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations; + +public interface ConfiguredAirbyteCatalogMigration extends Migration { + + /** + * Downgrades a ConfiguredAirbyteCatalog from the new version to the old version + * + * @param message: the ConfiguredAirbyteCatalog to downgrade + * @return the downgraded ConfiguredAirbyteCatalog + */ + PreviousVersion downgrade(final CurrentVersion message); + + /** + * Upgrades a ConfiguredAirbyteCatalog from the old version to the new version + * + * @param message: the ConfiguredAirbyteCatalog to upgrade + * @return the upgraded ConfiguredAirbyteCatalog + */ + CurrentVersion upgrade(final PreviousVersion message); + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java new file mode 100644 index 0000000000000..c2293d740da34 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.version.AirbyteProtocolVersion; +import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import jakarta.inject.Singleton; + +@Singleton +public class ConfiguredAirbyteCatalogMigrationV1 + implements ConfiguredAirbyteCatalogMigration { + + @Override + public ConfiguredAirbyteCatalog downgrade(io.airbyte.protocol.models.ConfiguredAirbyteCatalog message) { + return Jsons.object(Jsons.jsonNode(message), ConfiguredAirbyteCatalog.class); + } + + @Override + public io.airbyte.protocol.models.ConfiguredAirbyteCatalog upgrade(ConfiguredAirbyteCatalog message) { + return Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.ConfiguredAirbyteCatalog.class); + } + + @Override + public Version getPreviousVersion() { + return AirbyteProtocolVersion.V0; + } + + @Override + public Version getCurrentVersion() { + return AirbyteProtocolVersion.V1; + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/Migration.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/Migration.java new file mode 100644 index 0000000000000..8a21f8a9ab2fc --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/Migration.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations; + +import io.airbyte.commons.version.Version; + +public interface Migration { + + /** + * The Old version, note that due to semver, the important piece of information is the Major. + */ + Version getPreviousVersion(); + + /** + * The New version, note that due to semver, the important piece of information is the Major. + */ + Version getCurrentVersion(); + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/MigrationContainer.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/MigrationContainer.java new file mode 100644 index 0000000000000..6a317b03bd80e --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/MigrationContainer.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations; + +import io.airbyte.commons.version.Version; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.function.BiFunction; + +public class MigrationContainer { + + private final List migrationsToRegister; + private final SortedMap migrations = new TreeMap<>(); + private String mostRecentMajorVersion = ""; + + public MigrationContainer(final List migrations) { + this.migrationsToRegister = migrations; + } + + public void initialize() { + migrationsToRegister.forEach(this::registerMigration); + } + + public Version getMostRecentVersion() { + return new Version(mostRecentMajorVersion, "0", "0"); + } + + /** + * Downgrade a message from the most recent version to the target version by chaining all the + * required migrations + */ + public PreviousVersion downgrade(final CurrentVersion message, + final Version target, + final BiFunction applyDowngrade) { + if (target.getMajorVersion().equals(mostRecentMajorVersion)) { + return (PreviousVersion) message; + } + + Object result = message; + Object[] selectedMigrations = selectMigrations(target).toArray(); + for (int i = selectedMigrations.length; i > 0; --i) { + result = applyDowngrade.apply((T) selectedMigrations[i - 1], result); + } + return (PreviousVersion) result; + } + + /** + * Upgrade a message from the source version to the most recent version by chaining all the required + * migrations + */ + public CurrentVersion upgrade(final PreviousVersion message, + final Version source, + final BiFunction applyUpgrade) { + if (source.getMajorVersion().equals(mostRecentMajorVersion)) { + return (CurrentVersion) message; + } + + Object result = message; + for (var migration : selectMigrations(source)) { + result = applyUpgrade.apply(migration, result); + } + return (CurrentVersion) result; + } + + public Collection selectMigrations(final Version version) { + final Collection results = migrations.tailMap(version.getMajorVersion()).values(); + if (results.isEmpty()) { + throw new RuntimeException("Unsupported migration version " + version.serialize()); + } + return results; + } + + /** + * Store migration in a sorted map key by the major of the lower version of the migration. + * + * The goal is to be able to retrieve the list of migrations to apply to get to/from a given + * version. We are only keying on the lower version because the right side (most recent version of + * the migration range) is always current version. + */ + private void registerMigration(final T migration) { + final String key = migration.getPreviousVersion().getMajorVersion(); + if (!migrations.containsKey(key)) { + migrations.put(key, migration); + if (migration.getCurrentVersion().getMajorVersion().compareTo(mostRecentMajorVersion) > 0) { + mostRecentMajorVersion = migration.getCurrentVersion().getMajorVersion(); + } + } else { + throw new RuntimeException("Trying to register a duplicated migration " + migration.getClass().getName()); + } + } + + public Set getMigrationKeys() { + return migrations.keySet(); + } + +} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorMicronautTest.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorMicronautTest.java deleted file mode 100644 index a2d5556d99cd5..0000000000000 --- a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorMicronautTest.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.protocol; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import io.micronaut.test.extensions.junit5.annotation.MicronautTest; -import jakarta.inject.Inject; -import java.util.HashSet; -import java.util.List; -import org.junit.jupiter.api.Test; - -@MicronautTest -class AirbyteMessageMigratorMicronautTest { - - @Inject - AirbyteMessageMigrator messageMigrator; - - @Test - void testMigrationInjection() { - // This should contain the list of all the supported majors of the airbyte protocol except the most - // recent one since the migrations themselves are keyed on the lower version. - assertEquals(new HashSet<>(List.of("0")), messageMigrator.getMigrationKeys()); - } - -} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorTest.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorTest.java index c609b2f27b511..c11a33f66cc0f 100644 --- a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorTest.java +++ b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/AirbyteMessageMigratorTest.java @@ -9,6 +9,9 @@ import io.airbyte.commons.protocol.migrations.AirbyteMessageMigration; import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.List; +import java.util.Optional; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -27,12 +30,12 @@ record ObjectV2(String name2) {} static class Migrate0to1 implements AirbyteMessageMigration { @Override - public ObjectV0 downgrade(ObjectV1 message) { + public ObjectV0 downgrade(ObjectV1 message, Optional configuredAirbyteCatalog) { return new ObjectV0(message.name1); } @Override - public ObjectV1 upgrade(ObjectV0 message) { + public ObjectV1 upgrade(ObjectV0 message, Optional configuredAirbyteCatalog) { return new ObjectV1(message.name0); } @@ -51,12 +54,12 @@ public Version getCurrentVersion() { static class Migrate1to2 implements AirbyteMessageMigration { @Override - public ObjectV1 downgrade(ObjectV2 message) { + public ObjectV1 downgrade(ObjectV2 message, Optional configuredAirbyteCatalog) { return new ObjectV1(message.name2); } @Override - public ObjectV2 upgrade(ObjectV1 message) { + public ObjectV2 upgrade(ObjectV1 message, Optional configuredAirbyteCatalog) { return new ObjectV2(message.name1); } @@ -76,58 +79,60 @@ public Version getCurrentVersion() { @BeforeEach void beforeEach() { - migrator = new AirbyteMessageMigrator(); - migrator.registerMigration(new Migrate0to1()); - migrator.registerMigration(new Migrate1to2()); + migrator = new AirbyteMessageMigrator( + List.of(new Migrate0to1(), new Migrate1to2())); + migrator.initialize(); } @Test void testDowngrade() { final ObjectV2 obj = new ObjectV2("my name"); - final ObjectV0 objDowngradedTo0 = migrator.downgrade(obj, v0); + final ObjectV0 objDowngradedTo0 = migrator.downgrade(obj, v0, Optional.empty()); assertEquals(obj.name2, objDowngradedTo0.name0); - final ObjectV1 objDowngradedTo1 = migrator.downgrade(obj, v1); + final ObjectV1 objDowngradedTo1 = migrator.downgrade(obj, v1, Optional.empty()); assertEquals(obj.name2, objDowngradedTo1.name1); - final ObjectV2 objDowngradedTo2 = migrator.downgrade(obj, v2); + final ObjectV2 objDowngradedTo2 = migrator.downgrade(obj, v2, Optional.empty()); assertEquals(obj.name2, objDowngradedTo2.name2); } @Test void testUpgrade() { final ObjectV0 obj0 = new ObjectV0("my name 0"); - final ObjectV2 objUpgradedFrom0 = migrator.upgrade(obj0, v0); + final ObjectV2 objUpgradedFrom0 = migrator.upgrade(obj0, v0, Optional.empty()); assertEquals(obj0.name0, objUpgradedFrom0.name2); final ObjectV1 obj1 = new ObjectV1("my name 1"); - final ObjectV2 objUpgradedFrom1 = migrator.upgrade(obj1, v1); + final ObjectV2 objUpgradedFrom1 = migrator.upgrade(obj1, v1, Optional.empty()); assertEquals(obj1.name1, objUpgradedFrom1.name2); final ObjectV2 obj2 = new ObjectV2("my name 2"); - final ObjectV2 objUpgradedFrom2 = migrator.upgrade(obj2, v2); + final ObjectV2 objUpgradedFrom2 = migrator.upgrade(obj2, v2, Optional.empty()); assertEquals(obj2.name2, objUpgradedFrom2.name2); } @Test void testUnsupportedDowngradeShouldFailExplicitly() { assertThrows(RuntimeException.class, () -> { - migrator.downgrade(new ObjectV2("woot"), new Version("5.0.0")); + migrator.downgrade(new ObjectV2("woot"), new Version("5.0.0"), Optional.empty()); }); } @Test void testUnsupportedUpgradeShouldFailExplicitly() { assertThrows(RuntimeException.class, () -> { - migrator.upgrade(new ObjectV0("woot"), new Version("4.0.0")); + migrator.upgrade(new ObjectV0("woot"), new Version("4.0.0"), Optional.empty()); }); } @Test void testRegisterCollisionsShouldFail() { assertThrows(RuntimeException.class, () -> { - migrator.registerMigration(new Migrate0to1()); + migrator = new AirbyteMessageMigrator( + List.of(new Migrate0to1(), new Migrate1to2(), new Migrate0to1())); + migrator.initialize(); }); } diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/MigratorsMicronautTest.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/MigratorsMicronautTest.java new file mode 100644 index 0000000000000..1c52ed7c8a451 --- /dev/null +++ b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/MigratorsMicronautTest.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.micronaut.test.extensions.junit5.annotation.MicronautTest; +import jakarta.inject.Inject; +import java.util.Set; +import org.junit.jupiter.api.Test; + +@MicronautTest +class MigratorsMicronautTest { + + @Inject + AirbyteMessageMigrator messageMigrator; + + @Inject + ConfiguredAirbyteCatalogMigrator configuredAirbyteCatalogMigrator; + + // This should contain the list of all the supported majors of the airbyte protocol except the most + // recent one since the migrations themselves are keyed on the lower version. + final Set SUPPORTED_VERSIONS = Set.of("0"); + + @Test + void testAirbyteMessageMigrationInjection() { + assertEquals(SUPPORTED_VERSIONS, messageMigrator.getMigrationKeys()); + } + + @Test + void testConfiguredAirbyteCatalogMigrationInjection() { + assertEquals(SUPPORTED_VERSIONS, configuredAirbyteCatalogMigrator.getMigrationKeys()); + } + +} diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteDestination.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteDestination.java index 92350ccfaba10..117867d568f7d 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteDestination.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteDestination.java @@ -15,6 +15,8 @@ import io.airbyte.commons.logging.LoggingHelper.Color; import io.airbyte.commons.logging.MdcScope; import io.airbyte.commons.logging.MdcScope.Builder; +import io.airbyte.commons.protocol.DefaultProtocolSerializer; +import io.airbyte.commons.protocol.ProtocolSerializer; import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -49,6 +51,7 @@ public class DefaultAirbyteDestination implements AirbyteDestination { private final IntegrationLauncher integrationLauncher; private final AirbyteStreamFactory streamFactory; private final AirbyteMessageBufferedWriterFactory messageWriterFactory; + private final ProtocolSerializer protocolSerializer; private final AtomicBoolean inputHasEnded = new AtomicBoolean(false); @@ -58,16 +61,19 @@ public class DefaultAirbyteDestination implements AirbyteDestination { private Integer exitValue = null; public DefaultAirbyteDestination(final IntegrationLauncher integrationLauncher) { - this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER), new DefaultAirbyteMessageBufferedWriterFactory()); + this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER), new DefaultAirbyteMessageBufferedWriterFactory(), + new DefaultProtocolSerializer()); } public DefaultAirbyteDestination(final IntegrationLauncher integrationLauncher, final AirbyteStreamFactory streamFactory, - final AirbyteMessageBufferedWriterFactory messageWriterFactory) { + final AirbyteMessageBufferedWriterFactory messageWriterFactory, + final ProtocolSerializer protocolSerializer) { this.integrationLauncher = integrationLauncher; this.streamFactory = streamFactory; this.messageWriterFactory = messageWriterFactory; + this.protocolSerializer = protocolSerializer; } @Trace(operationName = WORKER_OPERATION_NAME) @@ -81,7 +87,7 @@ public void start(final WorkerDestinationConfig destinationConfig, final Path jo WorkerConstants.DESTINATION_CONFIG_JSON_FILENAME, Jsons.serialize(destinationConfig.getDestinationConnectionConfiguration()), WorkerConstants.DESTINATION_CATALOG_JSON_FILENAME, - Jsons.serialize(destinationConfig.getCatalog())); + protocolSerializer.serialize(destinationConfig.getCatalog())); // stdout logs are logged elsewhere since stdout also contains data LineGobbler.gobble(destinationProcess.getErrorStream(), LOGGER::error, "airbyte-destination", CONTAINER_LOG_MDC_BUILDER); diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteSource.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteSource.java index f834c33107fd4..4cadb5c14bdcd 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteSource.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/DefaultAirbyteSource.java @@ -16,6 +16,8 @@ import io.airbyte.commons.logging.LoggingHelper.Color; import io.airbyte.commons.logging.MdcScope; import io.airbyte.commons.logging.MdcScope.Builder; +import io.airbyte.commons.protocol.DefaultProtocolSerializer; +import io.airbyte.commons.protocol.ProtocolSerializer; import io.airbyte.config.WorkerSourceConfig; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -51,6 +53,7 @@ public class DefaultAirbyteSource implements AirbyteSource { private final IntegrationLauncher integrationLauncher; private final AirbyteStreamFactory streamFactory; + private final ProtocolSerializer protocolSerializer; private final HeartbeatMonitor heartbeatMonitor; private Process sourceProcess = null; @@ -59,19 +62,23 @@ public class DefaultAirbyteSource implements AirbyteSource { private final boolean logConnectorMessages = new EnvVariableFeatureFlags().logConnectorMessages(); public DefaultAirbyteSource(final IntegrationLauncher integrationLauncher) { - this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER)); + this(integrationLauncher, new DefaultAirbyteStreamFactory(CONTAINER_LOG_MDC_BUILDER), new DefaultProtocolSerializer()); } - public DefaultAirbyteSource(final IntegrationLauncher integrationLauncher, final AirbyteStreamFactory streamFactory) { - this(integrationLauncher, streamFactory, new HeartbeatMonitor(HEARTBEAT_FRESH_DURATION)); + public DefaultAirbyteSource(final IntegrationLauncher integrationLauncher, + final AirbyteStreamFactory streamFactory, + final ProtocolSerializer protocolSerializer) { + this(integrationLauncher, streamFactory, protocolSerializer, new HeartbeatMonitor(HEARTBEAT_FRESH_DURATION)); } @VisibleForTesting DefaultAirbyteSource(final IntegrationLauncher integrationLauncher, final AirbyteStreamFactory streamFactory, + final ProtocolSerializer protocolSerializer, final HeartbeatMonitor heartbeatMonitor) { this.integrationLauncher = integrationLauncher; this.streamFactory = streamFactory; + this.protocolSerializer = protocolSerializer; this.heartbeatMonitor = heartbeatMonitor; } @@ -84,8 +91,9 @@ public void start(final WorkerSourceConfig sourceConfig, final Path jobRoot) thr WorkerConstants.SOURCE_CONFIG_JSON_FILENAME, Jsons.serialize(sourceConfig.getSourceConnectionConfiguration()), WorkerConstants.SOURCE_CATALOG_JSON_FILENAME, - Jsons.serialize(sourceConfig.getCatalog()), + protocolSerializer.serialize(sourceConfig.getCatalog()), sourceConfig.getState() == null ? null : WorkerConstants.INPUT_STATE_JSON_FILENAME, + // TODO We should be passing a typed state here and use the protocolSerializer sourceConfig.getState() == null ? null : Jsons.serialize(sourceConfig.getState().getState())); // stdout logs are logged elsewhere since stdout also contains data LineGobbler.gobble(sourceProcess.getErrorStream(), LOGGER::error, "airbyte-source", CONTAINER_LOG_MDC_BUILDER); diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java index e1b9b25a4b92e..533274e8913a5 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriter.java @@ -7,25 +7,30 @@ import io.airbyte.commons.protocol.AirbyteMessageVersionedMigrator; import io.airbyte.commons.protocol.serde.AirbyteMessageSerializer; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.io.BufferedWriter; import java.io.IOException; +import java.util.Optional; public class VersionedAirbyteMessageBufferedWriter extends DefaultAirbyteMessageBufferedWriter { private final AirbyteMessageSerializer serializer; private final AirbyteMessageVersionedMigrator migrator; + private final Optional configuredAirbyteCatalog; public VersionedAirbyteMessageBufferedWriter(final BufferedWriter writer, final AirbyteMessageSerializer serializer, - final AirbyteMessageVersionedMigrator migrator) { + final AirbyteMessageVersionedMigrator migrator, + final Optional configuredAirbyteCatalog) { super(writer); this.serializer = serializer; this.migrator = migrator; + this.configuredAirbyteCatalog = configuredAirbyteCatalog; } @Override public void write(final AirbyteMessage message) throws IOException { - final T downgradedMessage = migrator.downgrade(message); + final T downgradedMessage = migrator.downgrade(message, configuredAirbyteCatalog); writer.write(serializer.serialize(downgradedMessage)); writer.newLine(); } diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriterFactory.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriterFactory.java index 3b2a2c8f0a56a..c10a0dc4e45ad 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriterFactory.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteMessageBufferedWriterFactory.java @@ -5,9 +5,11 @@ package io.airbyte.workers.internal; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.io.BufferedWriter; +import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -16,15 +18,18 @@ public class VersionedAirbyteMessageBufferedWriterFactory implements AirbyteMess private static final Logger LOGGER = LoggerFactory.getLogger(VersionedAirbyteMessageBufferedWriterFactory.class); private final AirbyteMessageSerDeProvider serDeProvider; - private final AirbyteMessageVersionedMigratorFactory migratorFactory; + private final AirbyteProtocolVersionedMigratorFactory migratorFactory; private final Version protocolVersion; + private final Optional configuredAirbyteCatalog; public VersionedAirbyteMessageBufferedWriterFactory(final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory, - final Version protocolVersion) { + final AirbyteProtocolVersionedMigratorFactory migratorFactory, + final Version protocolVersion, + final Optional configuredAirbyteCatalog) { this.serDeProvider = serDeProvider; this.migratorFactory = migratorFactory; this.protocolVersion = protocolVersion; + this.configuredAirbyteCatalog = configuredAirbyteCatalog; } @Override @@ -37,7 +42,8 @@ public AirbyteMessageBufferedWriter createWriter(BufferedWriter bufferedWriter) return new VersionedAirbyteMessageBufferedWriter<>( bufferedWriter, serDeProvider.getSerializer(protocolVersion).orElseThrow(), - migratorFactory.getVersionedMigrator(protocolVersion)); + migratorFactory.getAirbyteMessageMigrator(protocolVersion), + configuredAirbyteCatalog); } } diff --git a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java index da35ca9c327ed..d756a2e582f5f 100644 --- a/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java +++ b/airbyte-commons-worker/src/main/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactory.java @@ -13,10 +13,11 @@ import io.airbyte.commons.logging.MdcScope; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; import io.airbyte.commons.protocol.AirbyteMessageVersionedMigrator; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.protocol.serde.AirbyteMessageDeserializer; import io.airbyte.commons.version.Version; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.io.BufferedReader; import java.io.IOException; import java.util.Optional; @@ -45,7 +46,8 @@ public class VersionedAirbyteStreamFactory extends DefaultAirbyteStreamFactor private static final String TYPE_FIELD_NAME = "type"; private final AirbyteMessageSerDeProvider serDeProvider; - private final AirbyteMessageVersionedMigratorFactory migratorFactory; + private final AirbyteProtocolVersionedMigratorFactory migratorFactory; + private final Optional configuredAirbyteCatalog; private AirbyteMessageDeserializer deserializer; private AirbyteMessageVersionedMigrator migrator; private Version protocolVersion; @@ -53,20 +55,23 @@ public class VersionedAirbyteStreamFactory extends DefaultAirbyteStreamFactor private boolean shouldDetectVersion = false; public VersionedAirbyteStreamFactory(final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory, - final Version protocolVersion) { - this(serDeProvider, migratorFactory, protocolVersion, MdcScope.DEFAULT_BUILDER); + final AirbyteProtocolVersionedMigratorFactory migratorFactory, + final Version protocolVersion, + final Optional configuredAirbyteCatalog) { + this(serDeProvider, migratorFactory, protocolVersion, configuredAirbyteCatalog, MdcScope.DEFAULT_BUILDER); } public VersionedAirbyteStreamFactory(final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory, + final AirbyteProtocolVersionedMigratorFactory migratorFactory, final Version protocolVersion, + final Optional configuredAirbyteCatalog, final MdcScope.Builder containerLogMdcBuilder) { // TODO AirbyteProtocolPredicate needs to be updated to be protocol version aware super(new AirbyteProtocolPredicate(), LOGGER, containerLogMdcBuilder); Preconditions.checkNotNull(protocolVersion); this.serDeProvider = serDeProvider; this.migratorFactory = migratorFactory; + this.configuredAirbyteCatalog = configuredAirbyteCatalog; this.initializeForProtocolVersion(protocolVersion); } @@ -160,14 +165,14 @@ public VersionedAirbyteStreamFactory withDetectVersion(final boolean detectVe final protected void initializeForProtocolVersion(final Version protocolVersion) { this.deserializer = (AirbyteMessageDeserializer) serDeProvider.getDeserializer(protocolVersion).orElseThrow(); - this.migrator = migratorFactory.getVersionedMigrator(protocolVersion); + this.migrator = migratorFactory.getAirbyteMessageMigrator(protocolVersion); this.protocolVersion = protocolVersion; } @Override protected Stream toAirbyteMessage(final JsonNode json) { try { - final AirbyteMessage message = migrator.upgrade(deserializer.deserialize(json)); + final AirbyteMessage message = migrator.upgrade(deserializer.deserialize(json), configuredAirbyteCatalog); return Stream.of(message); } catch (final RuntimeException e) { logger.warn("Failed to upgrade a message from version {}: {}", protocolVersion, Jsons.serialize(json), e); diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteDestinationTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteDestinationTest.java index 5568660d7a528..f7a2adf44731f 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteDestinationTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteDestinationTest.java @@ -20,6 +20,8 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.commons.protocol.DefaultProtocolSerializer; +import io.airbyte.commons.protocol.ProtocolSerializer; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.WorkerDestinationConfig; import io.airbyte.config.helpers.LogClientSingleton; @@ -80,6 +82,7 @@ class DefaultAirbyteDestinationTest { private Process process; private AirbyteStreamFactory streamFactory; private AirbyteMessageBufferedWriterFactory messageWriterFactory; + private final ProtocolSerializer protocolSerializer = new DefaultProtocolSerializer(); private ByteArrayOutputStream outputStream; @BeforeEach @@ -122,7 +125,8 @@ void tearDown() throws IOException { @SuppressWarnings("BusyWait") @Test void testSuccessfulLifecycle() throws Exception { - final AirbyteDestination destination = new DefaultAirbyteDestination(integrationLauncher, streamFactory, messageWriterFactory); + final AirbyteDestination destination = + new DefaultAirbyteDestination(integrationLauncher, streamFactory, messageWriterFactory, protocolSerializer); destination.start(DESTINATION_CONFIG, jobRoot); final AirbyteMessage recordMessage = AirbyteMessageUtils.createRecordMessage(STREAM_NAME, FIELD_NAME, "blue"); @@ -161,7 +165,8 @@ void testSuccessfulLifecycle() throws Exception { @Test void testTaggedLogs() throws Exception { - final AirbyteDestination destination = new DefaultAirbyteDestination(integrationLauncher, streamFactory, messageWriterFactory); + final AirbyteDestination destination = + new DefaultAirbyteDestination(integrationLauncher, streamFactory, messageWriterFactory, protocolSerializer); destination.start(DESTINATION_CONFIG, jobRoot); final AirbyteMessage recordMessage = AirbyteMessageUtils.createRecordMessage(STREAM_NAME, FIELD_NAME, "blue"); diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteSourceTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteSourceTest.java index 76e1eba9843cd..13b1bed012469 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteSourceTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/DefaultAirbyteSourceTest.java @@ -20,6 +20,8 @@ import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.LoggingHelper.Color; +import io.airbyte.commons.protocol.DefaultProtocolSerializer; +import io.airbyte.commons.protocol.ProtocolSerializer; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.State; import io.airbyte.config.WorkerSourceConfig; @@ -94,6 +96,7 @@ class DefaultAirbyteSourceTest { private Process process; private AirbyteStreamFactory streamFactory; private HeartbeatMonitor heartbeatMonitor; + private final ProtocolSerializer protocolSerializer = new DefaultProtocolSerializer(); @BeforeEach void setup() throws IOException, WorkerException { @@ -137,7 +140,7 @@ void testSuccessfulLifecycle() throws Exception { when(heartbeatMonitor.isBeating()).thenReturn(true).thenReturn(false); - final AirbyteSource source = new DefaultAirbyteSource(integrationLauncher, streamFactory, heartbeatMonitor); + final AirbyteSource source = new DefaultAirbyteSource(integrationLauncher, streamFactory, protocolSerializer, heartbeatMonitor); source.start(SOURCE_CONFIG, jobRoot); final List messages = Lists.newArrayList(); @@ -172,8 +175,7 @@ void testTaggedLogs() throws Exception { when(heartbeatMonitor.isBeating()).thenReturn(true).thenReturn(false); - final AirbyteSource source = new DefaultAirbyteSource(integrationLauncher, streamFactory, - heartbeatMonitor); + final AirbyteSource source = new DefaultAirbyteSource(integrationLauncher, streamFactory, protocolSerializer, heartbeatMonitor); source.start(SOURCE_CONFIG, jobRoot); final List messages = Lists.newArrayList(); @@ -198,7 +200,7 @@ void testTaggedLogs() throws Exception { @Test void testNonzeroExitCodeThrows() throws Exception { - final AirbyteSource tap = new DefaultAirbyteSource(integrationLauncher, streamFactory, heartbeatMonitor); + final AirbyteSource tap = new DefaultAirbyteSource(integrationLauncher, streamFactory, protocolSerializer, heartbeatMonitor); tap.start(SOURCE_CONFIG, jobRoot); when(process.exitValue()).thenReturn(1); @@ -208,7 +210,7 @@ void testNonzeroExitCodeThrows() throws Exception { @Test void testIgnoredExitCodes() throws Exception { - final AirbyteSource tap = new DefaultAirbyteSource(integrationLauncher, streamFactory, heartbeatMonitor); + final AirbyteSource tap = new DefaultAirbyteSource(integrationLauncher, streamFactory, protocolSerializer, heartbeatMonitor); tap.start(SOURCE_CONFIG, jobRoot); when(process.isAlive()).thenReturn(false); @@ -220,7 +222,7 @@ void testIgnoredExitCodes() throws Exception { @Test void testGetExitValue() throws Exception { - final AirbyteSource source = new DefaultAirbyteSource(integrationLauncher, streamFactory, heartbeatMonitor); + final AirbyteSource source = new DefaultAirbyteSource(integrationLauncher, streamFactory, protocolSerializer, heartbeatMonitor); source.start(SOURCE_CONFIG, jobRoot); when(process.isAlive()).thenReturn(false); diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java index b692938e84e0c..b6b275548f241 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java @@ -10,8 +10,10 @@ import io.airbyte.commons.protocol.AirbyteMessageMigrator; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; +import io.airbyte.commons.protocol.ConfiguredAirbyteCatalogMigrator; import io.airbyte.commons.protocol.migrations.AirbyteMessageMigrationV1; +import io.airbyte.commons.protocol.migrations.ConfiguredAirbyteCatalogMigrationV1; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Deserializer; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Serializer; import io.airbyte.commons.protocol.serde.AirbyteMessageV1Deserializer; @@ -23,6 +25,7 @@ import java.io.StringReader; import java.nio.charset.Charset; import java.util.List; +import java.util.Optional; import java.util.stream.Stream; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -31,7 +34,7 @@ class VersionedAirbyteStreamFactoryTest { AirbyteMessageSerDeProvider serDeProvider; - AirbyteMessageVersionedMigratorFactory migratorFactory; + AirbyteProtocolVersionedMigratorFactory migratorFactory; final static Version defaultVersion = new Version("0.2.0"); @@ -41,29 +44,34 @@ void beforeEach() { List.of(new AirbyteMessageV0Deserializer(), new AirbyteMessageV1Deserializer()), List.of(new AirbyteMessageV0Serializer(), new AirbyteMessageV1Serializer()))); serDeProvider.initialize(); - final AirbyteMessageMigrator migrator = new AirbyteMessageMigrator( + final AirbyteMessageMigrator airbyteMessageMigrator = new AirbyteMessageMigrator( List.of(new AirbyteMessageMigrationV1())); - migrator.initialize(); - migratorFactory = spy(new AirbyteMessageVersionedMigratorFactory(migrator)); + airbyteMessageMigrator.initialize(); + final ConfiguredAirbyteCatalogMigrator configuredAirbyteCatalogMigrator = new ConfiguredAirbyteCatalogMigrator( + List.of(new ConfiguredAirbyteCatalogMigrationV1())); + configuredAirbyteCatalogMigrator.initialize(); + migratorFactory = spy(new AirbyteProtocolVersionedMigratorFactory(airbyteMessageMigrator, configuredAirbyteCatalogMigrator)); } @Test void testCreate() { final Version initialVersion = new Version("0.1.2"); - final VersionedAirbyteStreamFactory streamFactory = new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion); + final VersionedAirbyteStreamFactory streamFactory = + new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion, Optional.empty()); final BufferedReader bufferedReader = new BufferedReader(new StringReader("")); streamFactory.create(bufferedReader); verify(serDeProvider).getDeserializer(initialVersion); - verify(migratorFactory).getVersionedMigrator(initialVersion); + verify(migratorFactory).getAirbyteMessageMigrator(initialVersion); } @Test void testCreateWithVersionDetection() { final Version initialVersion = new Version("0.0.0"); - final VersionedAirbyteStreamFactory streamFactory = new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion) - .withDetectVersion(true); + final VersionedAirbyteStreamFactory streamFactory = + new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion, Optional.empty()) + .withDetectVersion(true); final BufferedReader bufferedReader = getBuffereredReader("version-detection/logs-with-version.jsonl"); @@ -78,8 +86,9 @@ void testCreateWithVersionDetection() { @Test void testCreateWithVersionDetectionFallback() { final Version initialVersion = new Version("0.0.6"); - final VersionedAirbyteStreamFactory streamFactory = new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion) - .withDetectVersion(true); + final VersionedAirbyteStreamFactory streamFactory = + new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion, Optional.empty()) + .withDetectVersion(true); final BufferedReader bufferedReader = getBuffereredReader("version-detection/logs-without-version.jsonl"); @@ -94,8 +103,9 @@ void testCreateWithVersionDetectionFallback() { @Test void testCreateWithVersionDetectionWithoutSpecMessage() { final Version initialVersion = new Version("0.0.1"); - final VersionedAirbyteStreamFactory streamFactory = new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion) - .withDetectVersion(true); + final VersionedAirbyteStreamFactory streamFactory = + new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, initialVersion, Optional.empty()) + .withDetectVersion(true); final BufferedReader bufferedReader = getBuffereredReader("version-detection/logs-without-spec-message.jsonl"); diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactory.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactory.java index 776bf81c73fac..519db8c0745bc 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactory.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactory.java @@ -9,7 +9,7 @@ import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.temporal.sync.OrchestratorConstants; import io.airbyte.config.EnvConfigs; import io.airbyte.container_orchestrator.orchestrator.DbtJobOrchestrator; @@ -101,7 +101,7 @@ JobOrchestrator jobOrchestrator( final FeatureFlags featureFlags, final WorkerConfigs workerConfigs, final AirbyteMessageSerDeProvider serdeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory, + final AirbyteProtocolVersionedMigratorFactory migratorFactory, final JobRunConfig jobRunConfig, final SourceApi sourceApi, final DestinationApi destinationApi) { diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java index e804ecb17adfe..71bf04be85259 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/orchestrator/ReplicationJobOrchestrator.java @@ -17,7 +17,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.logging.MdcScope; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.temporal.TemporalUtils; import io.airbyte.commons.version.Version; import io.airbyte.config.Configs; @@ -28,6 +28,7 @@ import io.airbyte.metrics.lib.MetricEmittingApps; import io.airbyte.persistence.job.models.IntegrationLauncherConfig; import io.airbyte.persistence.job.models.JobRunConfig; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.workers.RecordSchemaValidator; import io.airbyte.workers.WorkerConstants; import io.airbyte.workers.WorkerMetricReporter; @@ -61,7 +62,7 @@ public class ReplicationJobOrchestrator implements JobOrchestrator runJob() throws Exception { WorkerConstants.RESET_JOB_SOURCE_DOCKER_IMAGE_STUB.equals(sourceLauncherConfig.getDockerImage()) ? new EmptyAirbyteSource( featureFlags.useStreamCapableState()) : new DefaultAirbyteSource(sourceLauncher, - getStreamFactory(sourceLauncherConfig.getProtocolVersion(), DefaultAirbyteSource.CONTAINER_LOG_MDC_BUILDER)); + getStreamFactory(sourceLauncherConfig.getProtocolVersion(), syncInput.getCatalog(), DefaultAirbyteSource.CONTAINER_LOG_MDC_BUILDER), + migratorFactory.getProtocolSerializer(sourceLauncherConfig.getProtocolVersion())); MetricClientFactory.initialize(MetricEmittingApps.WORKER); final var metricClient = MetricClientFactory.getMetricClient(); @@ -153,9 +155,12 @@ public Optional runJob() throws Exception { Math.toIntExact(jobRunConfig.getAttemptId()), airbyteSource, new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()), - new DefaultAirbyteDestination(destinationLauncher, getStreamFactory(destinationLauncherConfig.getProtocolVersion(), - DefaultAirbyteDestination.CONTAINER_LOG_MDC_BUILDER), - new VersionedAirbyteMessageBufferedWriterFactory(serDeProvider, migratorFactory, destinationLauncherConfig.getProtocolVersion())), + new DefaultAirbyteDestination(destinationLauncher, + getStreamFactory(destinationLauncherConfig.getProtocolVersion(), syncInput.getCatalog(), + DefaultAirbyteDestination.CONTAINER_LOG_MDC_BUILDER), + new VersionedAirbyteMessageBufferedWriterFactory(serDeProvider, migratorFactory, destinationLauncherConfig.getProtocolVersion(), + Optional.of(syncInput.getCatalog())), + migratorFactory.getProtocolSerializer(destinationLauncherConfig.getProtocolVersion())), new AirbyteMessageTracker(), new RecordSchemaValidator(WorkerUtils.mapStreamNamesToSchemas(syncInput)), metricReporter, @@ -171,9 +176,11 @@ public Optional runJob() throws Exception { return Optional.of(Jsons.serialize(replicationOutput)); } - private AirbyteStreamFactory getStreamFactory(final Version protocolVersion, final MdcScope.Builder mdcScope) { + private AirbyteStreamFactory getStreamFactory(final Version protocolVersion, + final ConfiguredAirbyteCatalog configuredAirbyteCatalog, + final MdcScope.Builder mdcScope) { return protocolVersion != null - ? new VersionedAirbyteStreamFactory(serDeProvider, migratorFactory, protocolVersion, mdcScope) + ? new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, protocolVersion, Optional.of(configuredAirbyteCatalog), mdcScope) : new DefaultAirbyteStreamFactory(mdcScope); } diff --git a/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactoryTest.java b/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactoryTest.java index 1ece23705a9e6..903fa1b9726be 100644 --- a/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactoryTest.java +++ b/airbyte-container-orchestrator/src/test/java/io/airbyte/container_orchestrator/config/ContainerOrchestratorFactoryTest.java @@ -13,7 +13,7 @@ import io.airbyte.api.client.generated.SourceApi; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.config.EnvConfigs; import io.airbyte.persistence.job.models.JobRunConfig; import io.airbyte.workers.WorkerConfigs; @@ -46,7 +46,7 @@ class ContainerOrchestratorFactoryTest { AirbyteMessageSerDeProvider airbyteMessageSerDeProvider; @Inject - AirbyteMessageVersionedMigratorFactory airbyteMessageVersionedMigratorFactory; + AirbyteProtocolVersionedMigratorFactory airbyteProtocolVersionedMigratorFactory; @Inject JobRunConfig jobRunConfig; @@ -95,29 +95,29 @@ void jobOrchestrator() { final var repl = factory.jobOrchestrator( ReplicationLauncherWorker.REPLICATION, envConfigs, processFactory, featureFlags, workerConfigs, - airbyteMessageSerDeProvider, airbyteMessageVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); + airbyteMessageSerDeProvider, airbyteProtocolVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); assertEquals("Replication", repl.getOrchestratorName()); final var norm = factory.jobOrchestrator( NormalizationLauncherWorker.NORMALIZATION, envConfigs, processFactory, featureFlags, workerConfigs, - airbyteMessageSerDeProvider, airbyteMessageVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); + airbyteMessageSerDeProvider, airbyteProtocolVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); assertEquals("Normalization", norm.getOrchestratorName()); final var dbt = factory.jobOrchestrator( DbtLauncherWorker.DBT, envConfigs, processFactory, featureFlags, workerConfigs, - airbyteMessageSerDeProvider, airbyteMessageVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); + airbyteMessageSerDeProvider, airbyteProtocolVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); assertEquals("DBT Transformation", dbt.getOrchestratorName()); final var noop = factory.jobOrchestrator( AsyncOrchestratorPodProcess.NO_OP, envConfigs, processFactory, featureFlags, workerConfigs, - airbyteMessageSerDeProvider, airbyteMessageVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); + airbyteMessageSerDeProvider, airbyteProtocolVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); assertEquals("NO_OP", noop.getOrchestratorName()); var caught = false; try { factory.jobOrchestrator( "does not exist", envConfigs, processFactory, featureFlags, workerConfigs, - airbyteMessageSerDeProvider, airbyteMessageVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); + airbyteMessageSerDeProvider, airbyteProtocolVersionedMigratorFactory, jobRunConfig, sourceApi, destinationApi); } catch (final Exception e) { caught = true; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java index ba21f2af375a4..1b799568948e5 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/check/connection/CheckConnectionActivityImpl.java @@ -14,7 +14,7 @@ import io.airbyte.api.client.AirbyteApiClient; import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.temporal.CancellationHandler; import io.airbyte.commons.temporal.config.WorkerMode; import io.airbyte.config.Configs.WorkerEnvironment; @@ -44,6 +44,7 @@ import jakarta.inject.Singleton; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; @Singleton @Requires(env = WorkerMode.CONTROL_PLANE) @@ -58,7 +59,7 @@ public class CheckConnectionActivityImpl implements CheckConnectionActivity { private final AirbyteApiClient airbyteApiClient; private final String airbyteVersion; private final AirbyteMessageSerDeProvider serDeProvider; - private final AirbyteMessageVersionedMigratorFactory migratorFactory; + private final AirbyteProtocolVersionedMigratorFactory migratorFactory; public CheckConnectionActivityImpl(@Named("checkWorkerConfigs") final WorkerConfigs workerConfigs, @Named("checkProcessFactory") final ProcessFactory processFactory, @@ -69,7 +70,7 @@ public CheckConnectionActivityImpl(@Named("checkWorkerConfigs") final WorkerConf final AirbyteApiClient airbyteApiClient, @Value("${airbyte.version}") final String airbyteVersion, final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory) { + final AirbyteProtocolVersionedMigratorFactory migratorFactory) { this.workerConfigs = workerConfigs; this.processFactory = processFactory; this.workspaceRoot = workspaceRoot; @@ -133,7 +134,7 @@ private CheckedSupplier workerConfigs.getResourceRequirements(), launcherConfig.getIsCustomConnector()); final AirbyteStreamFactory streamFactory = launcherConfig.getProtocolVersion() != null - ? new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, launcherConfig.getProtocolVersion()) + ? new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, launcherConfig.getProtocolVersion(), Optional.empty()) : new DefaultAirbyteStreamFactory(); return new DefaultCheckConnectionWorker(integrationLauncher, streamFactory); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java index fd205a92e9876..5b23c9c058b61 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/discover/catalog/DiscoverCatalogActivityImpl.java @@ -14,7 +14,7 @@ import io.airbyte.api.client.AirbyteApiClient; import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.temporal.CancellationHandler; import io.airbyte.commons.temporal.config.WorkerMode; import io.airbyte.config.Configs.WorkerEnvironment; @@ -43,6 +43,7 @@ import jakarta.inject.Singleton; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; import lombok.extern.slf4j.Slf4j; @Singleton @@ -61,7 +62,7 @@ public class DiscoverCatalogActivityImpl implements DiscoverCatalogActivity { private final ConfigRepository configRepository; private final AirbyteMessageSerDeProvider serDeProvider; - private final AirbyteMessageVersionedMigratorFactory migratorFactory; + private final AirbyteProtocolVersionedMigratorFactory migratorFactory; public DiscoverCatalogActivityImpl(@Named("discoverWorkerConfigs") final WorkerConfigs workerConfigs, @Named("discoverProcessFactory") final ProcessFactory processFactory, @@ -73,7 +74,7 @@ public DiscoverCatalogActivityImpl(@Named("discoverWorkerConfigs") final WorkerC final AirbyteApiClient airbyteApiClient, @Value("${airbyte.version}") final String airbyteVersion, final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory) { + final AirbyteProtocolVersionedMigratorFactory migratorFactory) { this.configRepository = configRepository; this.workerConfigs = workerConfigs; this.processFactory = processFactory; @@ -126,7 +127,7 @@ private CheckedSupplier new AirbyteIntegrationLauncher(launcherConfig.getJobId(), launcherConfig.getAttemptId().intValue(), launcherConfig.getDockerImage(), processFactory, workerConfigs.getResourceRequirements(), launcherConfig.getIsCustomConnector()); final AirbyteStreamFactory streamFactory = - new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, launcherConfig.getProtocolVersion()); + new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, launcherConfig.getProtocolVersion(), Optional.empty()); return new DefaultDiscoverCatalogWorker(configRepository, integrationLauncher, streamFactory); }; } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java index 280196052ddb0..d07cc94c9e394 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/spec/SpecActivityImpl.java @@ -13,7 +13,7 @@ import io.airbyte.api.client.AirbyteApiClient; import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.temporal.CancellationHandler; import io.airbyte.commons.temporal.config.WorkerMode; import io.airbyte.commons.version.Version; @@ -41,6 +41,7 @@ import jakarta.inject.Singleton; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; import java.util.function.Supplier; @Singleton @@ -55,7 +56,7 @@ public class SpecActivityImpl implements SpecActivity { private final AirbyteApiClient airbyteApiClient; private final String airbyteVersion; private final AirbyteMessageSerDeProvider serDeProvider; - private final AirbyteMessageVersionedMigratorFactory migratorFactory; + private final AirbyteProtocolVersionedMigratorFactory migratorFactory; public SpecActivityImpl(@Named("specWorkerConfigs") final WorkerConfigs workerConfigs, @Named("specProcessFactory") final ProcessFactory processFactory, @@ -65,7 +66,7 @@ public SpecActivityImpl(@Named("specWorkerConfigs") final WorkerConfigs workerCo final AirbyteApiClient airbyteApiClient, @Value("${airbyte.version}") final String airbyteVersion, final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory) { + final AirbyteProtocolVersionedMigratorFactory migratorFactory) { this.workerConfigs = workerConfigs; this.processFactory = processFactory; this.workspaceRoot = workspaceRoot; @@ -123,7 +124,7 @@ private AirbyteStreamFactory getStreamFactory(final IntegrationLauncherConfig la final Version protocolVersion = launcherConfig.getProtocolVersion() != null ? launcherConfig.getProtocolVersion() : migratorFactory.getMostRecentVersion(); // Try to detect version from the stream - return new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, protocolVersion).withDetectVersion(true); + return new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, protocolVersion, Optional.empty()).withDetectVersion(true); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java index 820110a3eeed1..fb309f10abaf7 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/ReplicationActivityImpl.java @@ -23,7 +23,7 @@ import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; -import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory; +import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.temporal.CancellationHandler; import io.airbyte.commons.temporal.TemporalUtils; import io.airbyte.config.AirbyteConfigValidator; @@ -101,7 +101,7 @@ public class ReplicationActivityImpl implements ReplicationActivity { private final TemporalUtils temporalUtils; private final AirbyteApiClient airbyteApiClient; private final AirbyteMessageSerDeProvider serDeProvider; - private final AirbyteMessageVersionedMigratorFactory migratorFactory; + private final AirbyteProtocolVersionedMigratorFactory migratorFactory; private final WorkerConfigs workerConfigs; public ReplicationActivityImpl(@Named("containerOrchestratorConfig") final Optional containerOrchestratorConfig, @@ -117,7 +117,7 @@ public ReplicationActivityImpl(@Named("containerOrchestratorConfig") final Optio final TemporalUtils temporalUtils, final AirbyteApiClient airbyteApiClient, final AirbyteMessageSerDeProvider serDeProvider, - final AirbyteMessageVersionedMigratorFactory migratorFactory, + final AirbyteProtocolVersionedMigratorFactory migratorFactory, @Named("replicationWorkerConfigs") final WorkerConfigs workerConfigs) { this.containerOrchestratorConfig = containerOrchestratorConfig; this.processFactory = processFactory; @@ -290,7 +290,9 @@ private CheckedSupplier, Exception> ? new EmptyAirbyteSource(featureFlags.useStreamCapableState()) : new DefaultAirbyteSource(sourceLauncher, new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, sourceLauncherConfig.getProtocolVersion(), - DefaultAirbyteSource.CONTAINER_LOG_MDC_BUILDER)); + Optional.of(syncInput.getCatalog()), + DefaultAirbyteSource.CONTAINER_LOG_MDC_BUILDER), + migratorFactory.getProtocolSerializer(sourceLauncherConfig.getProtocolVersion())); MetricClientFactory.initialize(MetricEmittingApps.WORKER); final MetricClient metricClient = MetricClientFactory.getMetricClient(); final WorkerMetricReporter metricReporter = new WorkerMetricReporter(metricClient, sourceLauncherConfig.getDockerImage()); @@ -302,8 +304,11 @@ private CheckedSupplier, Exception> new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()), new DefaultAirbyteDestination(destinationLauncher, new VersionedAirbyteStreamFactory<>(serDeProvider, migratorFactory, destinationLauncherConfig.getProtocolVersion(), + Optional.of(syncInput.getCatalog()), DefaultAirbyteDestination.CONTAINER_LOG_MDC_BUILDER), - new VersionedAirbyteMessageBufferedWriterFactory(serDeProvider, migratorFactory, destinationLauncherConfig.getProtocolVersion())), + new VersionedAirbyteMessageBufferedWriterFactory(serDeProvider, migratorFactory, destinationLauncherConfig.getProtocolVersion(), + Optional.of(syncInput.getCatalog())), + migratorFactory.getProtocolSerializer(destinationLauncherConfig.getProtocolVersion())), new AirbyteMessageTracker(), new RecordSchemaValidator(WorkerUtils.mapStreamNamesToSchemas(syncInput)), metricReporter, From 5e819a41b5109e478e7b2d789d2d9f6e08a64c12 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 11 Jan 2023 07:33:43 -0800 Subject: [PATCH 07/11] Data types update: Implement protocol message migrations (#19240) * Extract MigrationContainer from AirbyteMessageMigrator * Add ConfiguredAirbyteCatalogMigrations * Add ConfiguredAirbyteCatalog to AirbyteMessageMigrations * Enable ConfiguredAirbyteCatalog migration * set up scaffolding * [wip] more scaffolding, basic unit test * minimal green code * [wip] add failing test for other primitive types * correct version number * handle basic primitive type decls * add implicit cases * add recursive schema * formatting * comment * support not * fix indentation * handle all nested schema cases * handle boolean schemas * verify empty schema handling * cleanup * extract map * code organization * extract method * reformat * [wip] more tests, minor fix type array handling * corrected test * cleanup * reformat * switch to v1 * add support for multityped fields * missed test case * nested test class * basic record upgrade * implement record upgrades * slight refactor * comments+clarificationso * extract constants * (partly) correct model classes * add de/ser * formatting * extract constants * fix json reference * update docs * switch to v1 models * fix compile+test * add base64 handling * use vnull * Data types update: Implement protocol message downgrade path (#19909) * rough skeleton for passing catalog into migration * basic test * more scaffolding * basic implementation * add primitives test * add in other tests (nested fields currently failing) * add formats * impleent oneOf handling * formatting * oneOf handling * better tests * comments + organization * progress * basic test case * downgrade objects, ish * basic array implementation * handle numeric failure * test for new type * handle array items * empty schema handling * first pass at oneof handling * add more tests+handling * more tests * comments * add empty oneof test case * format + reorganize * more reorganize * fix name * also downgrade binary data * only import vnull * move migrations into v1 package * extract schema mutation code * comment * extract schema migration to new class * extract record downgrade logic for future use * format * fix build after rebase * rename private method for consistency * also implement configuredcatalog migrations >.> * quick and dirty tests * slight cleanup * fix tests * pmd * pmd test * null check on message objects * maybe fix acceptance tests? * fix name * extract constants * more fixes * tmp * meh * fix cdc acc tests * revert to master source-postgres * remove log messages * revert other misc hacks * integers are valid cursors * remove unrelated change * fix build * fix build more? * [MUST REVERT] use dev normalization * capture kube logs * also here? * no debug logs? * delete dup from merging * add final everywhere * revert test changes Co-authored-by: Jimmy Ma --- .github/workflows/gradle.yml | 24 +- airbyte-commons-protocol/build.gradle | 1 + .../migrations/AirbyteMessageMigrationV1.java | 43 - .../ConfiguredAirbyteCatalogMigrationV1.java | 37 - .../migrations/util/RecordMigrations.java | 271 +++ .../migrations/util/SchemaMigrations.java | 131 ++ .../v1/AirbyteMessageMigrationV1.java | 178 ++ .../ConfiguredAirbyteCatalogMigrationV1.java | 54 + .../migrations/v1/SchemaMigrationV1.java | 306 +++ .../v1/AirbyteMessageMigrationV1Test.java | 1633 +++++++++++++++++ ...nfiguredAirbyteCatalogMigrationV1Test.java | 108 ++ .../serde/AirbyteMessageV1SerDeTest.java | 37 + .../src/test/resources/WellKnownTypes.json | 65 + .../VersionedAirbyteStreamFactoryTest.java | 4 +- .../logs-without-spec-message.jsonl | 4 +- .../relationaldb/StateDecoratingIterator.java | 2 +- .../validation/json/JsonSchemaValidator.java | 14 +- .../models/JsonSchemaPrimitiveUtil.java | 9 +- .../models/JsonSchemaReferenceTypes.java | 74 +- .../utils/AirbyteAcceptanceTestHarness.java | 13 + .../test/acceptance/BasicAcceptanceTests.java | 26 +- .../test/acceptance/CdcAcceptanceTests.java | 14 +- .../supported-data-types.md | 34 +- tools/bin/acceptance_test_kube.sh | 4 +- tools/bin/acceptance_test_kube_helm.sh | 4 +- 25 files changed, 2934 insertions(+), 156 deletions(-) delete mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java delete mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/RecordMigrations.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1.java create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java create mode 100644 airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1Test.java create mode 100644 airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1Test.java create mode 100644 airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1SerDeTest.java create mode 100644 airbyte-commons-protocol/src/test/resources/WellKnownTypes.json diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 25abc23e4d60c..2fcf214fb2c9e 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -12,7 +12,7 @@ on: description: "Enable or disable tmate session for debug during helm ac tests" type: choice default: 'false' - options: + options: - 'true' - 'false' required: false @@ -305,7 +305,7 @@ jobs: if: always() run: | gcs_bucket_name="dev-ab-ci-run-results" - filename=$(echo "${{ fromJSON( steps.connectors-test-results.outputs.json ).check_url }}" | sed 's@.*/@@') + filename=$(echo "${{ fromJSON( steps.connectors-test-results.outputs.json ).check_url }}" | sed 's@.*/@@') echo "$filename" gsutil -h "Cache-Control:public" cp connectors_base_results.jsonl "gs://$gcs_bucket_name/oss/$filename.jsonl" @@ -699,7 +699,7 @@ jobs: if: always() run: | gcs_bucket_name="dev-ab-ci-run-results" - filename=$(echo "${{ fromJSON( steps.platform-results.outputs.json ).check_url }}" | sed 's@.*/@@') + filename=$(echo "${{ fromJSON( steps.platform-results.outputs.json ).check_url }}" | sed 's@.*/@@') echo "$filename" gsutil -h "Cache-Control:public" cp platform_results.jsonl "gs://$gcs_bucket_name/oss/$filename.jsonl" @@ -756,7 +756,7 @@ jobs: github-token: ${{ env.PAT }} label: ${{ needs.start-platform-build-runner.outputs.label }} ec2-instance-id: ${{ needs.start-platform-build-runner.outputs.ec2-instance-id }} - + ## Kube Acceptance Tests # Docker acceptance tests run as part of the build job. # In case of self-hosted EC2 errors, remove this block. @@ -919,7 +919,7 @@ jobs: if: always() run: | gcs_bucket_name="dev-ab-ci-run-results" - filename=$(echo "${{ fromJSON( steps.kube-results.outputs.json ).check_url }}" | sed 's@.*/@@') + filename=$(echo "${{ fromJSON( steps.kube-results.outputs.json ).check_url }}" | sed 's@.*/@@') echo "$filename" gsutil -h "Cache-Control:public" cp kube_results.jsonl "gs://$gcs_bucket_name/oss/$filename.jsonl" @@ -1036,7 +1036,7 @@ jobs: - name: Fix EC-2 Runner run: | mkdir -p /actions-runner/_work/airbyte/airbyte && mkdir -p /actions-runner/_work/airbyte/airbyte/.kube - + - name: Checkout Airbyte uses: actions/checkout@v2 with: @@ -1059,7 +1059,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: '3.9' + python-version: '3.9' - uses: actions/setup-java@v1 with: @@ -1087,11 +1087,11 @@ jobs: sudo apt-get -y install tmate attempt_limit: 3 attempt_delay: 2000 # in ms - + - name: Start tmate session in background if: inputs.debug_mode == 'true' shell: bash - run: | + run: | tmate -S /tmp/tmate.sock new-session -d # Launch tmate in a headless mode tmate -S /tmp/tmate.sock wait tmate-ready # Blocks until the SSH connection is established tmate -S /tmp/tmate.sock display -p '#{tmate_ssh}' # Prints the SSH connection string @@ -1163,7 +1163,7 @@ jobs: with: name: Kubernetes Logs path: /tmp/kubernetes_logs/* - + - name: Upload test results to BuildPulse for flaky test detection if: "!cancelled()" # Run this step even when the tests fail. Skip if the workflow is cancelled. uses: Workshop64/buildpulse-action@main @@ -1173,13 +1173,13 @@ jobs: path: "/actions-runner/_work/airbyte/airbyte/*" key: ${{ secrets.BUILDPULSE_ACCESS_KEY_ID }} secret: ${{ secrets.BUILDPULSE_SECRET_ACCESS_KEY }} - + - name: "Display logs of k3s" if: failure() shell: bash run: | journalctl -xeu k3s.service - + # # In case of self-hosted EC2 errors, remove this block. stop-helm-acceptance-test-runner: diff --git a/airbyte-commons-protocol/build.gradle b/airbyte-commons-protocol/build.gradle index 502c714ffd8e3..cd5f7132b5fb3 100644 --- a/airbyte-commons-protocol/build.gradle +++ b/airbyte-commons-protocol/build.gradle @@ -6,6 +6,7 @@ dependencies { testImplementation libs.bundles.micronaut.test implementation project(':airbyte-protocol:protocol-models') + implementation project(':airbyte-json-validation') } Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project) diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java deleted file mode 100644 index b48a373f93297..0000000000000 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/AirbyteMessageMigrationV1.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.protocol.migrations; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.version.AirbyteProtocolVersion; -import io.airbyte.commons.version.Version; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.v0.AirbyteMessage; -import jakarta.inject.Singleton; -import java.util.Optional; - -/** - * Placeholder AirbyteMessage Migration from v0 to v1 - */ -@Singleton -public class AirbyteMessageMigrationV1 implements AirbyteMessageMigration { - - @Override - public AirbyteMessage downgrade(final io.airbyte.protocol.models.AirbyteMessage message, - final Optional configuredAirbyteCatalog) { - return Jsons.object(Jsons.jsonNode(message), AirbyteMessage.class); - } - - @Override - public io.airbyte.protocol.models.AirbyteMessage upgrade(final AirbyteMessage message, - final Optional configuredAirbyteCatalog) { - return Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.AirbyteMessage.class); - } - - @Override - public Version getPreviousVersion() { - return AirbyteProtocolVersion.V0; - } - - @Override - public Version getCurrentVersion() { - return AirbyteProtocolVersion.V1; - } - -} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java deleted file mode 100644 index c2293d740da34..0000000000000 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/ConfiguredAirbyteCatalogMigrationV1.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2022 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.protocol.migrations; - -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.version.AirbyteProtocolVersion; -import io.airbyte.commons.version.Version; -import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; -import jakarta.inject.Singleton; - -@Singleton -public class ConfiguredAirbyteCatalogMigrationV1 - implements ConfiguredAirbyteCatalogMigration { - - @Override - public ConfiguredAirbyteCatalog downgrade(io.airbyte.protocol.models.ConfiguredAirbyteCatalog message) { - return Jsons.object(Jsons.jsonNode(message), ConfiguredAirbyteCatalog.class); - } - - @Override - public io.airbyte.protocol.models.ConfiguredAirbyteCatalog upgrade(ConfiguredAirbyteCatalog message) { - return Jsons.object(Jsons.jsonNode(message), io.airbyte.protocol.models.ConfiguredAirbyteCatalog.class); - } - - @Override - public Version getPreviousVersion() { - return AirbyteProtocolVersion.V0; - } - - @Override - public Version getCurrentVersion() { - return AirbyteProtocolVersion.V1; - } - -} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/RecordMigrations.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/RecordMigrations.java new file mode 100644 index 0000000000000..81b5990dc73e5 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/RecordMigrations.java @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.util; + +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.ARRAY_TYPE; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.ITEMS_KEY; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.OBJECT_TYPE; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.ONEOF_KEY; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.PROPERTIES_KEY; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.REF_KEY; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.TYPE_KEY; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.validation.json.JsonSchemaValidator; +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.function.BiFunction; +import java.util.function.Function; + +public class RecordMigrations { + + /** + * Quick and dirty tuple. Used internally by + * {@link #mutateDataNode(JsonSchemaValidator, Function, Transformer, JsonNode, JsonNode)}; callers + * probably only actually need the node. + * + * matchedSchema is useful for mutating using a oneOf schema, where we need to recognize the correct + * subschema. + * + * @param node Our attempt at mutating the node, under the given schema + * @param matchedSchema Whether the original node actually matched the schema + */ + public record MigratedNode(JsonNode node, boolean matchedSchema) {} + + /** + * Extend BiFunction so that we can have named parameters. + */ + @FunctionalInterface + public interface Transformer extends BiFunction { + + @Override + MigratedNode apply(JsonNode schema, JsonNode data); + + } + + /** + * Works on a best-effort basis. If the schema doesn't match the data, we'll do our best to mutate + * anything that we can definitively say matches the criteria. Should _not_ throw an exception if + * bad things happen (e.g. we try to parse a non-numerical string as a number). + * + * @param schemaMatcher Accepts a JsonNode schema and returns whether its corresponding entry in the + * data should be mutated. Doesn't need to handle oneOf cases, i.e. should only care about + * type/$ref. + * @param transformer Performs the modification on the given data node. Should not throw exceptions. + */ + public static MigratedNode mutateDataNode( + final JsonSchemaValidator validator, + final Function schemaMatcher, + final Transformer transformer, + final JsonNode data, + final JsonNode schema) { + // If this is a oneOf node, then we need to handle each oneOf case. + if (!schema.hasNonNull(REF_KEY) && !schema.hasNonNull(TYPE_KEY) && schema.hasNonNull(ONEOF_KEY)) { + return mutateOneOfNode(validator, schemaMatcher, transformer, data, schema); + } + + // If we should mutate the data, then mutate it appropriately + if (schemaMatcher.apply(schema)) { + return transformer.apply(schema, data); + } + + // Otherwise, we need to recurse into non-primitive nodes. + if (data.isObject()) { + return mutateObjectNode(validator, schemaMatcher, transformer, data, schema); + } else if (data.isArray()) { + return mutateArrayNode(validator, schemaMatcher, transformer, data, schema); + } else { + // There's nothing to do in the case of a primitive node. + // So we just check whether the schema is correct and return the node as-is. + return new MigratedNode(data, validator.test(schema, data)); + } + } + + /** + * Attempt to mutate using each oneOf option in sequence. Returns the result from mutating using the + * first subschema that matches the data, or if none match, then the result of using the first + * subschema. + */ + private static MigratedNode mutateOneOfNode( + final JsonSchemaValidator validator, + final Function schemaMatcher, + final Transformer transformer, + final JsonNode data, + final JsonNode schema) { + final JsonNode schemaOptions = schema.get(ONEOF_KEY); + if (schemaOptions.size() == 0) { + // If the oneOf has no options, then don't do anything interesting. + return new MigratedNode(data, validator.test(schema, data)); + } + + // Attempt to mutate the node against each oneOf schema. + // Return the first schema that matches the data, or the first schema if none matched successfully. + MigratedNode migratedNode = null; + for (final JsonNode maybeSchema : schemaOptions) { + final MigratedNode maybeMigratedNode = mutateDataNode(validator, schemaMatcher, transformer, data, maybeSchema); + if (maybeMigratedNode.matchedSchema()) { + // If we've found a matching schema, then return immediately + return maybeMigratedNode; + } else if (migratedNode == null) { + // Otherwise - if this is the first subschema, then just take it + migratedNode = maybeMigratedNode; + } + } + // None of the schemas matched, so just return whatever we found first + return migratedNode; + } + + /** + * If data is an object, then we need to recursively mutate all of its fields. + */ + private static MigratedNode mutateObjectNode( + final JsonSchemaValidator validator, + final Function schemaMatcher, + final Transformer transformer, + final JsonNode data, + final JsonNode schema) { + boolean isObjectSchema; + // First, check whether the schema is supposed to be an object at all. + if (schema.hasNonNull(REF_KEY)) { + // If the schema uses a reference type, then it's not an object schema. + isObjectSchema = false; + } else if (schema.hasNonNull(TYPE_KEY)) { + // If the schema declares {type: object} or {type: [..., object, ...]} + // Then this is an object schema + final JsonNode typeNode = schema.get(TYPE_KEY); + if (typeNode.isArray()) { + isObjectSchema = false; + for (final JsonNode typeItem : typeNode) { + if (OBJECT_TYPE.equals(typeItem.asText())) { + isObjectSchema = true; + } + } + } else { + isObjectSchema = OBJECT_TYPE.equals(typeNode.asText()); + } + } else { + // If the schema doesn't declare a type at all (which is bad practice, but let's handle it anyway) + // Then check for a properties entry, and assume that this is an object if it's present + isObjectSchema = schema.hasNonNull(PROPERTIES_KEY); + } + + if (!isObjectSchema) { + // If it's not supposed to be an object, then we can't do anything here. + // Return the data without modification. + return new MigratedNode(data, false); + } else { + // If the schema _is_ for an object, then recurse into each field + final ObjectNode mutatedData = (ObjectNode) Jsons.emptyObject(); + final JsonNode propertiesNode = schema.get(PROPERTIES_KEY); + + final Iterator> dataFields = data.fields(); + boolean matchedSchema = true; + while (dataFields.hasNext()) { + final Entry field = dataFields.next(); + final String key = field.getKey(); + final JsonNode value = field.getValue(); + if (propertiesNode != null && propertiesNode.hasNonNull(key)) { + // If we have a schema for this property, mutate the value + final JsonNode subschema = propertiesNode.get(key); + final MigratedNode migratedNode = mutateDataNode(validator, schemaMatcher, transformer, value, subschema); + mutatedData.set(key, migratedNode.node); + if (!migratedNode.matchedSchema) { + matchedSchema = false; + } + } else { + // Else it's an additional property - we _could_ check additionalProperties, + // but that's annoying. We don't actually respect that in destinations/normalization anyway. + mutatedData.set(key, value); + } + } + + return new MigratedNode(mutatedData, matchedSchema); + } + } + + /** + * Much like objects, arrays must be recursively mutated. + */ + private static MigratedNode mutateArrayNode( + final JsonSchemaValidator validator, + final Function schemaMatcher, + final Transformer transformer, + final JsonNode data, + final JsonNode schema) { + // Similar to objects, we first check whether this is even supposed to be an array. + boolean isArraySchema; + if (schema.hasNonNull(REF_KEY)) { + // If the schema uses a reference type, then it's not an array schema. + isArraySchema = false; + } else if (schema.hasNonNull(TYPE_KEY)) { + // If the schema declares {type: array} or {type: [..., array, ...]} + // Then this is an array schema + final JsonNode typeNode = schema.get(TYPE_KEY); + if (typeNode.isArray()) { + isArraySchema = false; + for (final JsonNode typeItem : typeNode) { + if (ARRAY_TYPE.equals(typeItem.asText())) { + isArraySchema = true; + } + } + } else { + isArraySchema = ARRAY_TYPE.equals(typeNode.asText()); + } + } else { + // If the schema doesn't declare a type at all (which is bad practice, but let's handle it anyway) + // Then check for an items entry, and assume that this is an array if it's present + isArraySchema = schema.hasNonNull(ITEMS_KEY); + } + + if (!isArraySchema) { + return new MigratedNode(data, false); + } else { + final ArrayNode mutatedItems = Jsons.arrayNode(); + final JsonNode itemsNode = schema.get(ITEMS_KEY); + if (itemsNode == null) { + // We _could_ check additionalItems, but much like the additionalProperties comment for objects: + // it's a lot of work for no payoff + return new MigratedNode(data, true); + } else if (itemsNode.isArray()) { + // In the case of {items: [schema1, schema2, ...]} + // We need to check schema1 against the first element of the array, + // schema2 against the second element, etc. + boolean allSchemasMatched = true; + for (int i = 0; i < data.size(); i++) { + final JsonNode element = data.get(i); + if (itemsNode.size() > i) { + // If we have a schema for this element, then try mutating the element + final MigratedNode mutatedElement = mutateDataNode(validator, schemaMatcher, transformer, element, itemsNode.get(i)); + if (!mutatedElement.matchedSchema()) { + allSchemasMatched = false; + } + mutatedItems.add(mutatedElement.node()); + } + } + // If there were more elements in `data` than there were schemas in `itemsNode`, + // then just blindly add the rest of those elements. + for (int i = itemsNode.size(); i < data.size(); i++) { + mutatedItems.add(data.get(i)); + } + return new MigratedNode(mutatedItems, allSchemasMatched); + } else { + // IN the case of {items: schema}, we just check every array element against that schema. + boolean matchedSchema = true; + for (final JsonNode item : data) { + final MigratedNode migratedNode = mutateDataNode(validator, schemaMatcher, transformer, item, itemsNode); + mutatedItems.add(migratedNode.node); + if (!migratedNode.matchedSchema) { + matchedSchema = false; + } + } + return new MigratedNode(mutatedItems, matchedSchema); + } + } + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java new file mode 100644 index 0000000000000..c90aae6e7e6e3 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.util; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.function.Consumer; +import java.util.function.Function; + +/** + * Utility class for recursively modifying JsonSchemas. Useful for up/downgrading AirbyteCatalog + * objects. + * + * See {@link io.airbyte.commons.protocol.migrations.v1.SchemaMigrationV1} for example usage. + */ +public class SchemaMigrations { + + /** + * Generic utility method that recurses through all type declarations in the schema. For each type + * declaration that are accepted by matcher, mutate them using transformer. For all other type + * declarations, recurse into their subschemas (if any). + *

+ * Note that this modifies the schema in-place. Callers who need a copy of the old schema should + * save schema.deepCopy() before calling this method. + * + * @param schema The JsonSchema node to walk down + * @param matcher A function which returns true on any schema node that needs to be transformed + * @param transformer A function which mutates a schema node + */ + public static void mutateSchemas(final Function matcher, final Consumer transformer, final JsonNode schema) { + if (schema.isBoolean()) { + // We never want to modify a schema of `true` or `false` (e.g. additionalProperties: true) + // so just return immediately + return; + } + if (matcher.apply(schema)) { + // Base case: If this schema should be mutated, then we need to mutate it + transformer.accept(schema); + } else { + // Otherwise, we need to find all the subschemas and mutate them. + // technically, it might be more correct to do something like: + // if schema["type"] == "array": find subschemas for items, additionalItems, contains + // else if schema["type"] == "object": find subschemas for properties, patternProperties, + // additionalProperties + // else if oneof, allof, etc + // but that sounds really verbose for no real benefit + final List subschemas = new ArrayList<>(); + + // array schemas + findSubschemas(subschemas, schema, "items"); + findSubschemas(subschemas, schema, "additionalItems"); + findSubschemas(subschemas, schema, "contains"); + + // object schemas + if (schema.hasNonNull("properties")) { + final ObjectNode propertiesNode = (ObjectNode) schema.get("properties"); + final Iterator> propertiesIterator = propertiesNode.fields(); + while (propertiesIterator.hasNext()) { + final Entry property = propertiesIterator.next(); + subschemas.add(property.getValue()); + } + } + if (schema.hasNonNull("patternProperties")) { + final ObjectNode propertiesNode = (ObjectNode) schema.get("patternProperties"); + final Iterator> propertiesIterator = propertiesNode.fields(); + while (propertiesIterator.hasNext()) { + final Entry property = propertiesIterator.next(); + subschemas.add(property.getValue()); + } + } + findSubschemas(subschemas, schema, "additionalProperties"); + + // combining restrictions - destinations have limited support for these, but we should handle the + // schemas correctly anyway + findSubschemas(subschemas, schema, "allOf"); + findSubschemas(subschemas, schema, "oneOf"); + findSubschemas(subschemas, schema, "anyOf"); + findSubschemas(subschemas, schema, "not"); + + // recurse into each subschema + for (final JsonNode subschema : subschemas) { + mutateSchemas(matcher, transformer, subschema); + } + } + } + + /** + * If schema contains key, then grab the subschema(s) at schema[key] and add them to the subschemas + * list. + *

+ * For example: + *

    + *
  • schema = {"items": [{"type": "string}]} + *

    + * key = "items" + *

    + * -> add {"type": "string"} to subschemas

  • + *
  • schema = {"items": {"type": "string"}} + *

    + * key = "items" + *

    + * -> add {"type": "string"} to subschemas

  • + *
  • schema = {"additionalProperties": true} + *

    + * key = "additionalProperties" + *

    + * -> add nothing to subschemas + *

    + * (technically `true` is a valid JsonSchema, but we don't want to modify it)

  • + *
+ */ + public static void findSubschemas(final List subschemas, final JsonNode schema, final String key) { + if (schema.hasNonNull(key)) { + final JsonNode subschemaNode = schema.get(key); + if (subschemaNode.isArray()) { + for (final JsonNode subschema : subschemaNode) { + subschemas.add(subschema); + } + } else if (subschemaNode.isObject()) { + subschemas.add(subschemaNode); + } + } + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1.java new file mode 100644 index 0000000000000..5150e2ac0e48f --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1.java @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.v1; + +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.REF_KEY; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.node.TextNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.protocol.migrations.AirbyteMessageMigration; +import io.airbyte.commons.protocol.migrations.util.RecordMigrations; +import io.airbyte.commons.protocol.migrations.util.RecordMigrations.MigratedNode; +import io.airbyte.commons.version.AirbyteProtocolVersion; +import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.JsonSchemaReferenceTypes; +import io.airbyte.validation.json.JsonSchemaValidator; +import jakarta.inject.Singleton; +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; + +@Singleton +public class AirbyteMessageMigrationV1 implements AirbyteMessageMigration { + + private final JsonSchemaValidator validator; + + public AirbyteMessageMigrationV1() { + this(new JsonSchemaValidator()); + } + + @VisibleForTesting + public AirbyteMessageMigrationV1(final JsonSchemaValidator validator) { + this.validator = validator; + } + + @Override + public io.airbyte.protocol.models.v0.AirbyteMessage downgrade(final AirbyteMessage oldMessage, + final Optional configuredAirbyteCatalog) { + final io.airbyte.protocol.models.v0.AirbyteMessage newMessage = Jsons.object( + Jsons.jsonNode(oldMessage), + io.airbyte.protocol.models.v0.AirbyteMessage.class); + if (oldMessage.getType() == Type.CATALOG && oldMessage.getCatalog() != null) { + for (final io.airbyte.protocol.models.v0.AirbyteStream stream : newMessage.getCatalog().getStreams()) { + final JsonNode schema = stream.getJsonSchema(); + SchemaMigrationV1.downgradeSchema(schema); + } + } else if (oldMessage.getType() == Type.RECORD && oldMessage.getRecord() != null) { + if (configuredAirbyteCatalog.isPresent()) { + final ConfiguredAirbyteCatalog catalog = configuredAirbyteCatalog.get(); + final io.airbyte.protocol.models.v0.AirbyteRecordMessage record = newMessage.getRecord(); + final Optional maybeStream = catalog.getStreams().stream() + .filter(stream -> Objects.equals(stream.getStream().getName(), record.getStream()) + && Objects.equals(stream.getStream().getNamespace(), record.getNamespace())) + .findFirst(); + // If this record doesn't belong to any configured stream, then there's no point downgrading it + // So only do the downgrade if we can find its stream + if (maybeStream.isPresent()) { + final JsonNode schema = maybeStream.get().getStream().getJsonSchema(); + final JsonNode oldData = record.getData(); + final MigratedNode downgradedNode = downgradeRecord(oldData, schema); + record.setData(downgradedNode.node()); + } + } + } + return newMessage; + } + + @Override + public AirbyteMessage upgrade(final io.airbyte.protocol.models.v0.AirbyteMessage oldMessage, + final Optional configuredAirbyteCatalog) { + // We're not introducing any changes to the structure of the record/catalog + // so just clone a new message object, which we can edit in-place + final AirbyteMessage newMessage = Jsons.object( + Jsons.jsonNode(oldMessage), + AirbyteMessage.class); + if (oldMessage.getType() == io.airbyte.protocol.models.v0.AirbyteMessage.Type.CATALOG && oldMessage.getCatalog() != null) { + for (final AirbyteStream stream : newMessage.getCatalog().getStreams()) { + final JsonNode schema = stream.getJsonSchema(); + SchemaMigrationV1.upgradeSchema(schema); + } + } else if (oldMessage.getType() == io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD && oldMessage.getRecord() != null) { + final JsonNode oldData = newMessage.getRecord().getData(); + final JsonNode newData = upgradeRecord(oldData); + newMessage.getRecord().setData(newData); + } + return newMessage; + } + + /** + * Returns a copy of oldData, with numeric values converted to strings. String and boolean values + * are returned as-is for convenience, i.e. this is not a true deep copy. + */ + private static JsonNode upgradeRecord(final JsonNode oldData) { + if (oldData.isNumber()) { + // Base case: convert numbers to strings + return Jsons.convertValue(oldData.asText(), TextNode.class); + } else if (oldData.isObject()) { + // Recurse into each field of the object + final ObjectNode newData = (ObjectNode) Jsons.emptyObject(); + + final Iterator> fieldsIterator = oldData.fields(); + while (fieldsIterator.hasNext()) { + final Entry next = fieldsIterator.next(); + final String key = next.getKey(); + final JsonNode value = next.getValue(); + + final JsonNode newValue = upgradeRecord(value); + newData.set(key, newValue); + } + + return newData; + } else if (oldData.isArray()) { + // Recurse into each element of the array + final ArrayNode newData = Jsons.arrayNode(); + for (final JsonNode element : oldData) { + newData.add(upgradeRecord(element)); + } + return newData; + } else { + // Base case: this is a string or boolean, so we don't need to modify it + return oldData; + } + } + + /** + * We need the schema to recognize which fields are integers, since it would be wrong to just assume + * any numerical string should be parsed out. + * + * Works on a best-effort basis. If the schema doesn't match the data, we'll do our best to + * downgrade anything that we can definitively say is a number. Should _not_ throw an exception if + * bad things happen (e.g. we try to parse a non-numerical string as a number). + */ + private MigratedNode downgradeRecord(final JsonNode data, final JsonNode schema) { + return RecordMigrations.mutateDataNode( + validator, + s -> { + if (s.hasNonNull(REF_KEY)) { + final String type = s.get(REF_KEY).asText(); + return JsonSchemaReferenceTypes.INTEGER_REFERENCE.equals(type) + || JsonSchemaReferenceTypes.NUMBER_REFERENCE.equals(type); + } else { + return false; + } + }, + (s, d) -> { + if (d.asText().matches("-?\\d+(\\.\\d+)?")) { + // If this string is a numeric literal, convert it to a numeric node. + return new MigratedNode(Jsons.deserialize(d.asText()), true); + } else { + // Otherwise, just leave the node unchanged. + return new MigratedNode(d, false); + } + }, + data, schema); + } + + @Override + public Version getPreviousVersion() { + return AirbyteProtocolVersion.V0; + } + + @Override + public Version getCurrentVersion() { + return AirbyteProtocolVersion.V1; + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1.java new file mode 100644 index 0000000000000..e400b31724b70 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.v1; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.protocol.migrations.ConfiguredAirbyteCatalogMigration; +import io.airbyte.commons.version.AirbyteProtocolVersion; +import io.airbyte.commons.version.Version; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import jakarta.inject.Singleton; + +@Singleton +public class ConfiguredAirbyteCatalogMigrationV1 + implements ConfiguredAirbyteCatalogMigration { + + @Override + public io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog downgrade(final ConfiguredAirbyteCatalog oldMessage) { + final io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog newMessage = Jsons.object( + Jsons.jsonNode(oldMessage), + io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog.class); + for (final io.airbyte.protocol.models.v0.ConfiguredAirbyteStream stream : newMessage.getStreams()) { + final JsonNode schema = stream.getStream().getJsonSchema(); + SchemaMigrationV1.downgradeSchema(schema); + } + return newMessage; + } + + @Override + public ConfiguredAirbyteCatalog upgrade(final io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog oldMessage) { + final ConfiguredAirbyteCatalog newMessage = Jsons.object( + Jsons.jsonNode(oldMessage), + ConfiguredAirbyteCatalog.class); + for (final ConfiguredAirbyteStream stream : newMessage.getStreams()) { + final JsonNode schema = stream.getStream().getJsonSchema(); + SchemaMigrationV1.upgradeSchema(schema); + } + return newMessage; + } + + @Override + public Version getPreviousVersion() { + return AirbyteProtocolVersion.V0; + } + + @Override + public Version getCurrentVersion() { + return AirbyteProtocolVersion.V1; + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java new file mode 100644 index 0000000000000..6cbc37e2639b6 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.v1; + +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.ONEOF_KEY; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.REF_KEY; +import static io.airbyte.protocol.models.JsonSchemaReferenceTypes.TYPE_KEY; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.protocol.migrations.util.SchemaMigrations; +import io.airbyte.protocol.models.JsonSchemaReferenceTypes; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.StreamSupport; + +public class SchemaMigrationV1 { + + /** + * Perform the {type: foo} -> {$ref: foo} upgrade. Modifies the schema in-place. + */ + public static void upgradeSchema(final JsonNode schema) { + SchemaMigrations.mutateSchemas( + SchemaMigrationV1::isPrimitiveTypeDeclaration, + SchemaMigrationV1::upgradeTypeDeclaration, + schema); + } + + /** + * Perform the {$ref: foo} -> {type: foo} downgrade. Modifies the schema in-place. + */ + public static void downgradeSchema(final JsonNode schema) { + SchemaMigrations.mutateSchemas( + SchemaMigrationV1::isPrimitiveReferenceTypeDeclaration, + SchemaMigrationV1::downgradeTypeDeclaration, + schema); + } + + /** + * Detects any schema that looks like a primitive type declaration, e.g.: { "type": "string" } or { + * "type": ["string", "object"] } + */ + private static boolean isPrimitiveTypeDeclaration(final JsonNode schema) { + if (!schema.isObject() || !schema.hasNonNull(TYPE_KEY)) { + return false; + } + final JsonNode typeNode = schema.get(TYPE_KEY); + if (typeNode.isArray()) { + return StreamSupport.stream(typeNode.spliterator(), false) + .anyMatch(n -> JsonSchemaReferenceTypes.PRIMITIVE_JSON_TYPES.contains(n.asText())); + } else { + return JsonSchemaReferenceTypes.PRIMITIVE_JSON_TYPES.contains(typeNode.asText()); + } + } + + /** + * Detects any schema that looks like a reference type declaration, e.g.: { "$ref": + * "WellKnownTypes.json...." } or { "oneOf": [{"$ref": "..."}, {"type": "object"}] } + */ + private static boolean isPrimitiveReferenceTypeDeclaration(final JsonNode schema) { + if (!schema.isObject()) { + // Non-object schemas (i.e. true/false) never need to be modified + return false; + } else if (schema.hasNonNull(REF_KEY) && schema.get(REF_KEY).asText().startsWith("WellKnownTypes.json")) { + // If this schema has a $ref, then we need to convert it back to type/airbyte_type/format + return true; + } else if (schema.hasNonNull(ONEOF_KEY)) { + // If this is a oneOf with at least one primitive $ref option, then we should consider converting it + // back + final List subschemas = getSubschemas(schema, ONEOF_KEY); + return subschemas.stream().anyMatch( + subschema -> subschema.hasNonNull(REF_KEY) + && subschema.get(REF_KEY).asText().startsWith("WellKnownTypes.json")); + } else { + return false; + } + } + + /** + * Modifies the schema in-place to upgrade from the old-style type declaration to the new-style $ref + * declaration. Assumes that the schema is an ObjectNode containing a primitive declaration, i.e. + * either something like: {"type": "string"} or: {"type": ["string", "object"]} + *

+ * In the latter case, the schema may contain subschemas. This method mutually recurses with + * {@link SchemaMigrations#mutateSchemas(Function, Consumer, JsonNode)} to upgrade those subschemas. + * + * @param schema An ObjectNode representing a primitive type declaration + */ + private static void upgradeTypeDeclaration(final JsonNode schema) { + final ObjectNode schemaNode = (ObjectNode) schema; + + if (schemaNode.hasNonNull("airbyte_type")) { + // If airbyte_type is defined, always respect it + final String referenceType = JsonSchemaReferenceTypes.LEGACY_AIRBYTE_PROPERY_TO_REFERENCE.get(schemaNode.get("airbyte_type").asText()); + schemaNode.removeAll(); + schemaNode.put(REF_KEY, referenceType); + } else { + // Otherwise, fall back to type/format + final JsonNode typeNode = schemaNode.get(TYPE_KEY); + if (typeNode.isTextual()) { + // If the type is a single string, then replace this node with the appropriate reference type + final String type = typeNode.asText(); + final String referenceType = getReferenceType(type, schemaNode); + schemaNode.removeAll(); + schemaNode.put(REF_KEY, referenceType); + } else { + // If type is an array of strings, then things are more complicated + final List types = StreamSupport.stream(typeNode.spliterator(), false) + .map(JsonNode::asText) + // Everything is implicitly nullable by just not declaring the `required `field + // so filter out any explicit null types + .filter(type -> !"null".equals(type)) + .toList(); + final boolean exactlyOneType = types.size() == 1; + if (exactlyOneType) { + // If there's only one type, e.g. {type: [string]}, just treat that as equivalent to {type: string} + final String type = types.get(0); + final String referenceType = getReferenceType(type, schemaNode); + schemaNode.removeAll(); + schemaNode.put(REF_KEY, referenceType); + } else { + // If there are multiple types, we'll need to convert this to a oneOf. + // For arrays and objects, we do a mutual recursion back into mutateSchemas to upgrade their + // subschemas. + final ArrayNode oneOfOptions = Jsons.arrayNode(); + for (final String type : types) { + final ObjectNode option = (ObjectNode) Jsons.emptyObject(); + switch (type) { + case "array" -> { + option.put(TYPE_KEY, "array"); + copyKey(schemaNode, option, "items"); + copyKey(schemaNode, option, "additionalItems"); + copyKey(schemaNode, option, "contains"); + upgradeSchema(option); + } + case "object" -> { + option.put(TYPE_KEY, "object"); + copyKey(schemaNode, option, "properties"); + copyKey(schemaNode, option, "patternProperties"); + copyKey(schemaNode, option, "additionalProperties"); + upgradeSchema(option); + } + default -> { + final String referenceType = getReferenceType(type, schemaNode); + option.put(REF_KEY, referenceType); + } + } + oneOfOptions.add(option); + } + schemaNode.removeAll(); + schemaNode.set(ONEOF_KEY, oneOfOptions); + } + } + } + } + + /** + * Modifies the schema in-place to downgrade from the new-style $ref declaration to the old-style + * type declaration. Assumes that the schema is an ObjectNode containing a primitive declaration, + * i.e. either something like: {"$ref": "WellKnownTypes..."} or: {"oneOf": [{"$ref": + * "WellKnownTypes..."}, ...]} + *

+ * In the latter case, the schema may contain subschemas. This method mutually recurses with + * {@link SchemaMigrations#mutateSchemas(Function, Consumer, JsonNode)} to downgrade those + * subschemas. + * + * @param schema An ObjectNode representing a primitive type declaration + */ + private static void downgradeTypeDeclaration(final JsonNode schema) { + if (schema.hasNonNull(REF_KEY)) { + // If this is a direct type declaration, then we can just replace it with the old-style declaration + final String referenceType = schema.get(REF_KEY).asText(); + ((ObjectNode) schema).removeAll(); + ((ObjectNode) schema).setAll(JsonSchemaReferenceTypes.REFERENCE_TYPE_TO_OLD_TYPE.get(referenceType)); + } else if (schema.hasNonNull(ONEOF_KEY)) { + // If this is a oneOf, then we need to check whether we can recombine it into a single type + // declaration. + // This means we must do three things: + // 1. Downgrade each subschema + // 2. Build a new `type` array, containing the `type` of each subschema + // 3. Combine all the fields in each subschema (properties, items, etc) + // If any two subschemas have the same `type`, or the same field, then we can't combine them, but we + // should still downgrade them. + // See V0ToV1MigrationTest.CatalogDowngradeTest#testDowngradeMultiTypeFields for some examples. + + // We'll build up a node containing the combined subschemas. + final ObjectNode replacement = (ObjectNode) Jsons.emptyObject(); + // As part of this, we need to build up a list of `type` entries. For ease of access, we'll keep it + // in a List. + final List types = new ArrayList<>(); + + boolean canRecombineSubschemas = true; + for (final JsonNode subschemaNode : schema.get(ONEOF_KEY)) { + // No matter what - we always need to downgrade the subschema node. + downgradeSchema(subschemaNode); + + if (subschemaNode instanceof ObjectNode subschema) { + // If this subschema is an object, then we can attempt to combine it with the other subschemas. + + // First, update our list of types. + final JsonNode subschemaType = subschema.get(TYPE_KEY); + if (subschemaType != null) { + if (types.contains(subschemaType.asText())) { + // If another subschema has the same type, then we can't combine them. + canRecombineSubschemas = false; + } else { + types.add(subschemaType.asText()); + } + } + + // Then, update the combined schema with this subschema's fields. + if (canRecombineSubschemas) { + final Iterator> fields = subschema.fields(); + while (fields.hasNext()) { + final Entry field = fields.next(); + if (TYPE_KEY.equals(field.getKey())) { + // We're handling the `type` field outside this loop, so ignore it here. + continue; + } + if (replacement.has(field.getKey())) { + // A previous subschema is already using this field, so we should stop trying to combine them. + canRecombineSubschemas = false; + break; + } else { + replacement.set(field.getKey(), field.getValue()); + } + } + } + } else { + // If this subschema is a boolean, then the oneOf is doing something funky, and we shouldn't attempt + // to + // combine it into a single type entry + canRecombineSubschemas = false; + } + } + + if (canRecombineSubschemas) { + // Update our replacement node with the full list of types + final ArrayNode typeNode = Jsons.arrayNode(); + types.forEach(typeNode::add); + replacement.set(TYPE_KEY, typeNode); + + // And commit our changes to the actual schema node + ((ObjectNode) schema).removeAll(); + ((ObjectNode) schema).setAll(replacement); + } + } + } + + private static void copyKey(final ObjectNode source, final ObjectNode target, final String key) { + if (source.hasNonNull(key)) { + target.set(key, source.get(key)); + } + } + + /** + * Given a primitive (string/int/num/bool) type declaration _without_ an airbyte_type, get the + * appropriate $ref type. In most cases, this only depends on the "type" key. When type=string, also + * checks the "format" key. + */ + private static String getReferenceType(final String type, final ObjectNode schemaNode) { + return switch (type) { + case "string" -> { + if (schemaNode.hasNonNull("format")) { + yield switch (schemaNode.get("format").asText()) { + case "date" -> JsonSchemaReferenceTypes.DATE_REFERENCE; + // In these two cases, we default to the "with timezone" type, rather than "without timezone". + // This matches existing behavior in normalization. + case "date-time" -> JsonSchemaReferenceTypes.TIMESTAMP_WITH_TIMEZONE_REFERENCE; + case "time" -> JsonSchemaReferenceTypes.TIME_WITH_TIMEZONE_REFERENCE; + // If we don't recognize the format, just use a plain string + default -> JsonSchemaReferenceTypes.STRING_REFERENCE; + }; + } else if (schemaNode.hasNonNull("contentEncoding")) { + if ("base64".equals(schemaNode.get("contentEncoding").asText())) { + yield JsonSchemaReferenceTypes.BINARY_DATA_REFERENCE; + } else { + yield JsonSchemaReferenceTypes.STRING_REFERENCE; + } + } else { + yield JsonSchemaReferenceTypes.STRING_REFERENCE; + } + } + case "integer" -> JsonSchemaReferenceTypes.INTEGER_REFERENCE; + case "number" -> JsonSchemaReferenceTypes.NUMBER_REFERENCE; + case "boolean" -> JsonSchemaReferenceTypes.BOOLEAN_REFERENCE; + // This is impossible, because we'll only call this method on string/integer/number/boolean + default -> throw new IllegalStateException("Somehow got non-primitive type: " + type + " for schema: " + schemaNode); + }; + } + + private static List getSubschemas(final JsonNode schema, final String key) { + final List subschemas = new ArrayList<>(); + SchemaMigrations.findSubschemas(subschemas, schema, key); + return subschemas; + } + +} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1Test.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1Test.java new file mode 100644 index 0000000000000..f7f081aa63283 --- /dev/null +++ b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/AirbyteMessageMigrationV1Test.java @@ -0,0 +1,1633 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.v1; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.validation.json.JsonSchemaValidator; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Optional; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +// most of these tests rely on a doTest utility method for brevity, which hides the assertion. +@SuppressWarnings("PMD.JUnitTestsShouldIncludeAssert") +class AirbyteMessageMigrationV1Test { + + JsonSchemaValidator validator; + private AirbyteMessageMigrationV1 migration; + + @BeforeEach + void setup() throws URISyntaxException { + // TODO this should probably just get generated as part of the airbyte-protocol build, and + // airbyte-workers / airbyte-commons-protocol would reference it directly + final URI parentUri = MoreResources.readResourceAsFile("WellKnownTypes.json").getAbsoluteFile().toURI(); + validator = new JsonSchemaValidator(parentUri); + migration = new AirbyteMessageMigrationV1(validator); + } + + @Test + void testVersionMetadata() { + assertEquals("0.3.0", migration.getPreviousVersion().serialize()); + assertEquals("1.0.0", migration.getCurrentVersion().serialize()); + } + + @Nested + class CatalogUpgradeTest { + + @Test + void testBasicUpgrade() { + // This isn't actually a valid stream schema (since it's not an object) + // but this test case is mostly about preserving the message structure, so it's not super relevant + final JsonNode oldSchema = Jsons.deserialize( + """ + { + "type": "string" + } + """); + + final AirbyteMessage upgradedMessage = migration.upgrade(createCatalogMessage(oldSchema), Optional.empty()); + + final AirbyteMessage expectedMessage = Jsons.deserialize( + """ + { + "type": "CATALOG", + "catalog": { + "streams": [ + { + "json_schema": { + "$ref": "WellKnownTypes.json#/definitions/String" + } + } + ] + } + } + """, + AirbyteMessage.class); + assertEquals(expectedMessage, upgradedMessage); + } + + @Test + void testNullUpgrade() { + final io.airbyte.protocol.models.v0.AirbyteMessage oldMessage = new io.airbyte.protocol.models.v0.AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.CATALOG); + final AirbyteMessage upgradedMessage = migration.upgrade(oldMessage, Optional.empty()); + final AirbyteMessage expectedMessage = new AirbyteMessage().withType(Type.CATALOG); + assertEquals(expectedMessage, upgradedMessage); + } + + /** + * Utility method to upgrade the oldSchema, and assert that the result is equal to expectedSchema + * + * @param oldSchemaString The schema to be upgraded + * @param expectedSchemaString The expected schema after upgrading + */ + private void doTest(final String oldSchemaString, final String expectedSchemaString) { + final JsonNode oldSchema = Jsons.deserialize(oldSchemaString); + + final AirbyteMessage upgradedMessage = migration.upgrade(createCatalogMessage(oldSchema), Optional.empty()); + + final JsonNode expectedSchema = Jsons.deserialize(expectedSchemaString); + assertEquals(expectedSchema, upgradedMessage.getCatalog().getStreams().get(0).getJsonSchema()); + } + + @Test + void testUpgradeAllPrimitives() { + doTest( + """ + { + "type": "object", + "properties": { + "example_string": { + "type": "string" + }, + "example_number": { + "type": "number" + }, + "example_integer": { + "type": "integer" + }, + "example_airbyte_integer": { + "type": "number", + "airbyte_type": "integer" + }, + "example_boolean": { + "type": "boolean" + }, + "example_timestamptz": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + }, + "example_timestamptz_implicit": { + "type": "string", + "format": "date-time" + }, + "example_timestamp_without_tz": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "example_timez": { + "type": "string", + "format": "time", + "airbyte_type": "time_with_timezone" + }, + "example_timetz_implicit": { + "type": "string", + "format": "time" + }, + "example_time_without_tz": { + "type": "string", + "format": "time", + "airbyte_type": "time_without_timezone" + }, + "example_date": { + "type": "string", + "format": "date" + }, + "example_binary": { + "type": "string", + "contentEncoding": "base64" + } + } + } + """, + """ + { + "type": "object", + "properties": { + "example_string": { + "$ref": "WellKnownTypes.json#/definitions/String" + }, + "example_number": { + "$ref": "WellKnownTypes.json#/definitions/Number" + }, + "example_integer": { + "$ref": "WellKnownTypes.json#/definitions/Integer" + }, + "example_airbyte_integer": { + "$ref": "WellKnownTypes.json#/definitions/Integer" + }, + "example_boolean": { + "$ref": "WellKnownTypes.json#/definitions/Boolean" + }, + "example_timestamptz": { + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" + }, + "example_timestamptz_implicit": { + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" + }, + "example_timestamp_without_tz": { + "$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone" + }, + "example_timez": { + "$ref": "WellKnownTypes.json#/definitions/TimeWithTimezone" + }, + "example_timetz_implicit": { + "$ref": "WellKnownTypes.json#/definitions/TimeWithTimezone" + }, + "example_time_without_tz": { + "$ref": "WellKnownTypes.json#/definitions/TimeWithoutTimezone" + }, + "example_date": { + "$ref": "WellKnownTypes.json#/definitions/Date" + }, + "example_binary": { + "$ref": "WellKnownTypes.json#/definitions/BinaryData" + } + } + } + """); + } + + @Test + void testUpgradeNestedFields() { + doTest( + """ + { + "type": "object", + "properties": { + "basic_array": { + "items": {"type": "string"} + }, + "tuple_array": { + "items": [ + {"type": "string"}, + {"type": "integer"} + ], + "additionalItems": {"type": "string"}, + "contains": {"type": "integer"} + }, + "nested_object": { + "properties": { + "id": {"type": "integer"}, + "nested_oneof": { + "oneOf": [ + {"type": "string"}, + {"type": "integer"} + ] + }, + "nested_anyof": { + "anyOf": [ + {"type": "string"}, + {"type": "integer"} + ] + }, + "nested_allof": { + "allOf": [ + {"type": "string"}, + {"type": "integer"} + ] + }, + "nested_not": { + "not": [ + {"type": "string"}, + {"type": "integer"} + ] + } + }, + "patternProperties": { + "integer_.*": {"type": "integer"} + }, + "additionalProperties": {"type": "string"} + } + } + } + """, + """ + { + "type": "object", + "properties": { + "basic_array": { + "items": {"$ref": "WellKnownTypes.json#/definitions/String"} + }, + "tuple_array": { + "items": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ], + "additionalItems": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "contains": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "nested_object": { + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "nested_oneof": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "nested_anyof": { + "anyOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "nested_allof": { + "allOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "nested_not": { + "not": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + } + }, + "patternProperties": { + "integer_.*": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "additionalProperties": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + } + } + """); + } + + @Test + void testUpgradeBooleanSchemas() { + // Most of these should never happen in reality, but let's handle them just in case + // The only ones that we're _really_ expecting are additionalItems and additionalProperties + final String schemaString = """ + { + "type": "object", + "properties": { + "basic_array": { + "items": true + }, + "tuple_array": { + "items": [true], + "additionalItems": true, + "contains": true + }, + "nested_object": { + "properties": { + "id": true, + "nested_oneof": { + "oneOf": [true] + }, + "nested_anyof": { + "anyOf": [true] + }, + "nested_allof": { + "allOf": [true] + }, + "nested_not": { + "not": [true] + } + }, + "patternProperties": { + "integer_.*": true + }, + "additionalProperties": true + } + } + } + """; + doTest(schemaString, schemaString); + } + + @Test + void testUpgradeEmptySchema() { + // Sources shouldn't do this, but we should have handling for it anyway, since it's not currently + // enforced by SATs + final String schemaString = """ + { + "type": "object", + "properties": { + "basic_array": { + "items": {} + }, + "tuple_array": { + "items": [{}], + "additionalItems": {}, + "contains": {} + }, + "nested_object": { + "properties": { + "id": {}, + "nested_oneof": { + "oneOf": [{}] + }, + "nested_anyof": { + "anyOf": [{}] + }, + "nested_allof": { + "allOf": [{}] + }, + "nested_not": { + "not": [{}] + } + }, + "patternProperties": { + "integer_.*": {} + }, + "additionalProperties": {} + } + } + } + """; + doTest(schemaString, schemaString); + } + + @Test + void testUpgradeLiteralSchema() { + // Verify that we do _not_ recurse into places we shouldn't + final String schemaString = """ + { + "type": "object", + "properties": { + "example_schema": { + "type": "object", + "default": {"type": "string"}, + "enum": [{"type": "string"}], + "const": {"type": "string"} + } + } + } + """; + doTest(schemaString, schemaString); + } + + @Test + void testUpgradeMalformedSchemas() { + // These schemas are "wrong" in some way. For example, normalization will currently treat + // bad_timestamptz as a string timestamp_with_timezone, + // i.e. it will disregard the option for a boolean. + // Generating this sort of schema is just wrong; sources shouldn't do this to begin with. But let's + // verify that we behave mostly correctly here. + doTest( + """ + { + "type": "object", + "properties": { + "bad_timestamptz": { + "type": ["boolean", "string"], + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + }, + "bad_integer": { + "type": "string", + "format": "date-time", + "airbyte_type": "integer" + } + } + } + """, + """ + { + "type": "object", + "properties": { + "bad_timestamptz": {"$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone"}, + "bad_integer": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + """); + } + + @Test + void testUpgradeMultiTypeFields() { + doTest( + """ + { + "type": "object", + "properties": { + "multityped_field": { + "type": ["string", "object", "array"], + "properties": { + "id": {"type": "string"} + }, + "patternProperties": { + "integer_.*": {"type": "integer"} + }, + "additionalProperties": {"type": "string"}, + "items": {"type": "string"}, + "additionalItems": {"type": "string"}, + "contains": {"type": "string"} + }, + "nullable_multityped_field": { + "type": ["null", "string", "array", "object"], + "items": [{"type": "string"}, {"type": "integer"}], + "properties": { + "id": {"type": "integer"} + } + }, + "multityped_date_field": { + "type": ["string", "integer"], + "format": "date" + }, + "sneaky_singletype_field": { + "type": ["string", "null"], + "format": "date-time" + } + } + } + """, + """ + { + "type": "object", + "properties": { + "multityped_field": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + { + "type": "object", + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/String"} + }, + "patternProperties": { + "integer_.*": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "additionalProperties": {"$ref": "WellKnownTypes.json#/definitions/String"} + }, + { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "additionalItems": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "contains": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + ] + }, + "nullable_multityped_field": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + { + "type": "array", + "items": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + { + "type": "object", + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + ] + }, + "multityped_date_field": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/Date"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "sneaky_singletype_field": {"$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone"} + } + } + """); + } + + private io.airbyte.protocol.models.v0.AirbyteMessage createCatalogMessage(final JsonNode schema) { + return new io.airbyte.protocol.models.v0.AirbyteMessage().withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.CATALOG) + .withCatalog( + new io.airbyte.protocol.models.v0.AirbyteCatalog().withStreams(List.of(new io.airbyte.protocol.models.v0.AirbyteStream().withJsonSchema( + schema)))); + } + + } + + @Nested + class RecordUpgradeTest { + + @Test + void testBasicUpgrade() { + final JsonNode oldData = Jsons.deserialize( + """ + { + "id": 42 + } + """); + + final AirbyteMessage upgradedMessage = migration.upgrade(createRecordMessage(oldData), Optional.empty()); + + final AirbyteMessage expectedMessage = Jsons.deserialize( + """ + { + "type": "RECORD", + "record": { + "data": { + "id": "42" + } + } + } + """, + AirbyteMessage.class); + assertEquals(expectedMessage, upgradedMessage); + } + + @Test + void testNullUpgrade() { + final io.airbyte.protocol.models.v0.AirbyteMessage oldMessage = new io.airbyte.protocol.models.v0.AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD); + final AirbyteMessage upgradedMessage = migration.upgrade(oldMessage, Optional.empty()); + final AirbyteMessage expectedMessage = new AirbyteMessage().withType(Type.RECORD); + assertEquals(expectedMessage, upgradedMessage); + } + + /** + * Utility method to upgrade the oldData, and assert that the result is equal to expectedData + * + * @param oldDataString The data of the record to be upgraded + * @param expectedDataString The expected data after upgrading + */ + private void doTest(final String oldDataString, final String expectedDataString) { + final JsonNode oldData = Jsons.deserialize(oldDataString); + + final AirbyteMessage upgradedMessage = migration.upgrade(createRecordMessage(oldData), Optional.empty()); + + final JsonNode expectedData = Jsons.deserialize(expectedDataString); + assertEquals(expectedData, upgradedMessage.getRecord().getData()); + } + + @Test + void testNestedUpgrade() { + doTest( + """ + { + "int": 42, + "float": 42.0, + "float2": 42.2, + "sub_object": { + "sub_int": 42, + "sub_float": 42.0, + "sub_float2": 42.2 + }, + "sub_array": [42, 42.0, 42.2] + } + """, + """ + { + "int": "42", + "float": "42.0", + "float2": "42.2", + "sub_object": { + "sub_int": "42", + "sub_float": "42.0", + "sub_float2": "42.2" + }, + "sub_array": ["42", "42.0", "42.2"] + } + """); + } + + @Test + void testNonUpgradableValues() { + doTest( + """ + { + "boolean": true, + "string": "arst", + "sub_object": { + "boolean": true, + "string": "arst" + }, + "sub_array": [true, "arst"] + } + """, + """ + { + "boolean": true, + "string": "arst", + "sub_object": { + "boolean": true, + "string": "arst" + }, + "sub_array": [true, "arst"] + } + """); + } + + private io.airbyte.protocol.models.v0.AirbyteMessage createRecordMessage(final JsonNode data) { + return new io.airbyte.protocol.models.v0.AirbyteMessage().withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD) + .withRecord(new io.airbyte.protocol.models.v0.AirbyteRecordMessage().withData(data)); + } + + } + + @Nested + class CatalogDowngradeTest { + + @Test + void testBasicDowngrade() { + // This isn't actually a valid stream schema (since it's not an object) + // but this test case is mostly about preserving the message structure, so it's not super relevant + final JsonNode newSchema = Jsons.deserialize( + """ + { + "$ref": "WellKnownTypes.json#/definitions/String" + } + """); + + final io.airbyte.protocol.models.v0.AirbyteMessage downgradedMessage = migration.downgrade(createCatalogMessage(newSchema), Optional.empty()); + + final io.airbyte.protocol.models.v0.AirbyteMessage expectedMessage = Jsons.deserialize( + """ + { + "type": "CATALOG", + "catalog": { + "streams": [ + { + "json_schema": { + "type": "string" + } + } + ] + } + } + """, + io.airbyte.protocol.models.v0.AirbyteMessage.class); + assertEquals(expectedMessage, downgradedMessage); + } + + @Test + void testNullDowngrade() { + final AirbyteMessage oldMessage = new AirbyteMessage().withType(Type.CATALOG); + final io.airbyte.protocol.models.v0.AirbyteMessage upgradedMessage = migration.downgrade(oldMessage, Optional.empty()); + final io.airbyte.protocol.models.v0.AirbyteMessage expectedMessage = new io.airbyte.protocol.models.v0.AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.CATALOG); + assertEquals(expectedMessage, upgradedMessage); + } + + /** + * Utility method to downgrade the oldSchema, and assert that the result is equal to expectedSchema + * + * @param oldSchemaString The schema to be downgraded + * @param expectedSchemaString The expected schema after downgrading + */ + private void doTest(final String oldSchemaString, final String expectedSchemaString) { + final JsonNode oldSchema = Jsons.deserialize(oldSchemaString); + + final io.airbyte.protocol.models.v0.AirbyteMessage downgradedMessage = migration.downgrade(createCatalogMessage(oldSchema), Optional.empty()); + + final JsonNode expectedSchema = Jsons.deserialize(expectedSchemaString); + assertEquals(expectedSchema, downgradedMessage.getCatalog().getStreams().get(0).getJsonSchema()); + } + + @Test + void testDowngradeAllPrimitives() { + doTest( + """ + { + "type": "object", + "properties": { + "example_string": { + "$ref": "WellKnownTypes.json#/definitions/String" + }, + "example_number": { + "$ref": "WellKnownTypes.json#/definitions/Number" + }, + "example_integer": { + "$ref": "WellKnownTypes.json#/definitions/Integer" + }, + "example_boolean": { + "$ref": "WellKnownTypes.json#/definitions/Boolean" + }, + "example_timestamptz": { + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" + }, + "example_timestamp_without_tz": { + "$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone" + }, + "example_timez": { + "$ref": "WellKnownTypes.json#/definitions/TimeWithTimezone" + }, + "example_time_without_tz": { + "$ref": "WellKnownTypes.json#/definitions/TimeWithoutTimezone" + }, + "example_date": { + "$ref": "WellKnownTypes.json#/definitions/Date" + }, + "example_binary": { + "$ref": "WellKnownTypes.json#/definitions/BinaryData" + } + } + } + """, + """ + { + "type": "object", + "properties": { + "example_string": { + "type": "string" + }, + "example_number": { + "type": "number" + }, + "example_integer": { + "type": "number", + "airbyte_type": "integer" + }, + "example_boolean": { + "type": "boolean" + }, + "example_timestamptz": { + "type": "string", + "airbyte_type": "timestamp_with_timezone", + "format": "date-time" + }, + "example_timestamp_without_tz": { + "type": "string", + "airbyte_type": "timestamp_without_timezone", + "format": "date-time" + }, + "example_timez": { + "type": "string", + "airbyte_type": "time_with_timezone", + "format": "time" + }, + "example_time_without_tz": { + "type": "string", + "airbyte_type": "time_without_timezone", + "format": "time" + }, + "example_date": { + "type": "string", + "format": "date" + }, + "example_binary": { + "type": "string", + "contentEncoding": "base64" + } + } + } + """); + } + + @Test + void testDowngradeNestedFields() { + doTest( + """ + { + "type": "object", + "properties": { + "basic_array": { + "items": {"$ref": "WellKnownTypes.json#/definitions/String"} + }, + "tuple_array": { + "items": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ], + "additionalItems": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "contains": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "nested_object": { + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "nested_oneof": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone"} + ] + }, + "nested_anyof": { + "anyOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "nested_allof": { + "allOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "nested_not": { + "not": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + } + }, + "patternProperties": { + "integer_.*": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "additionalProperties": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + } + } + """, + """ + { + "type": "object", + "properties": { + "basic_array": { + "items": {"type": "string"} + }, + "tuple_array": { + "items": [ + {"type": "string"}, + {"type": "number", "airbyte_type": "integer"} + ], + "additionalItems": {"type": "string"}, + "contains": {"type": "number", "airbyte_type": "integer"} + }, + "nested_object": { + "properties": { + "id": {"type": "number", "airbyte_type": "integer"}, + "nested_oneof": { + "oneOf": [ + {"type": "string"}, + {"type": "string", "format": "date-time", "airbyte_type": "timestamp_with_timezone"} + ] + }, + "nested_anyof": { + "anyOf": [ + {"type": "string"}, + {"type": "number", "airbyte_type": "integer"} + ] + }, + "nested_allof": { + "allOf": [ + {"type": "string"}, + {"type": "number", "airbyte_type": "integer"} + ] + }, + "nested_not": { + "not": [ + {"type": "string"}, + {"type": "number", "airbyte_type": "integer"} + ] + } + }, + "patternProperties": { + "integer_.*": {"type": "number", "airbyte_type": "integer"} + }, + "additionalProperties": {"type": "string"} + } + } + } + """); + } + + @Test + void testDowngradeBooleanSchemas() { + // Most of these should never happen in reality, but let's handle them just in case + // The only ones that we're _really_ expecting are additionalItems and additionalProperties + final String schemaString = """ + { + "type": "object", + "properties": { + "basic_array": { + "items": true + }, + "tuple_array": { + "items": [true], + "additionalItems": true, + "contains": true + }, + "nested_object": { + "properties": { + "id": true, + "nested_oneof": { + "oneOf": [true] + }, + "nested_anyof": { + "anyOf": [true] + }, + "nested_allof": { + "allOf": [true] + }, + "nested_not": { + "not": [true] + } + }, + "patternProperties": { + "integer_.*": true + }, + "additionalProperties": true + } + } + } + """; + doTest(schemaString, schemaString); + } + + @Test + void testDowngradeEmptySchema() { + // Sources shouldn't do this, but we should have handling for it anyway, since it's not currently + // enforced by SATs + final String schemaString = """ + { + "type": "object", + "properties": { + "basic_array": { + "items": {} + }, + "tuple_array": { + "items": [{}], + "additionalItems": {}, + "contains": {} + }, + "nested_object": { + "properties": { + "id": {}, + "nested_oneof": { + "oneOf": [{}] + }, + "nested_anyof": { + "anyOf": [{}] + }, + "nested_allof": { + "allOf": [{}] + }, + "nested_not": { + "not": [{}] + } + }, + "patternProperties": { + "integer_.*": {} + }, + "additionalProperties": {} + } + } + } + """; + doTest(schemaString, schemaString); + } + + @Test + void testDowngradeLiteralSchema() { + // Verify that we do _not_ recurse into places we shouldn't + final String schemaString = """ + { + "type": "object", + "properties": { + "example_schema": { + "type": "object", + "default": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "enum": [{"$ref": "WellKnownTypes.json#/definitions/String"}], + "const": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + } + } + """; + doTest(schemaString, schemaString); + } + + @Test + void testDowngradeMultiTypeFields() { + doTest( + """ + { + "type": "object", + "properties": { + "multityped_field": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + { + "type": "object", + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/String"} + }, + "patternProperties": { + "integer_.*": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "additionalProperties": {"$ref": "WellKnownTypes.json#/definitions/String"} + }, + { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "additionalItems": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "contains": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + ] + }, + "multityped_date_field": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/Date"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + }, + "boolean_field": { + "oneOf": [ + true, + {"$ref": "WellKnownTypes.json#/definitions/String"}, + false + ] + }, + "conflicting_field": { + "oneOf": [ + {"type": "object", "properties": {"id": {"$ref": "WellKnownTypes.json#/definitions/String"}}}, + {"type": "object", "properties": {"name": {"$ref": "WellKnownTypes.json#/definitions/String"}}}, + {"$ref": "WellKnownTypes.json#/definitions/String"} + ] + }, + "conflicting_primitives": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone"}, + {"$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone"} + ] + } + } + } + """, + """ + { + "type": "object", + "properties": { + "multityped_field": { + "type": ["string", "object", "array"], + "properties": { + "id": {"type": "string"} + }, + "patternProperties": { + "integer_.*": {"type": "number", "airbyte_type": "integer"} + }, + "additionalProperties": {"type": "string"}, + "items": {"type": "string"}, + "additionalItems": {"type": "string"}, + "contains": {"type": "string"} + }, + "multityped_date_field": { + "type": ["string", "number"], + "format": "date", + "airbyte_type": "integer" + }, + "boolean_field": { + "oneOf": [ + true, + {"type": "string"}, + false + ] + }, + "conflicting_field": { + "oneOf": [ + {"type": "object", "properties": {"id": {"type": "string"}}}, + {"type": "object", "properties": {"name": {"type": "string"}}}, + {"type": "string"} + ] + }, + "conflicting_primitives": { + "oneOf": [ + {"type": "string", "format": "date-time", "airbyte_type": "timestamp_without_timezone"}, + {"type": "string", "format": "date-time", "airbyte_type": "timestamp_with_timezone"} + ] + } + } + } + """); + } + + @Test + void testDowngradeWeirdSchemas() { + // old_style_schema isn't actually valid (i.e. v1 schemas should always be using $ref) + // but we should check that it behaves well anyway + doTest( + """ + { + "type": "object", + "properties": { + "old_style_schema": {"type": "string"} + } + } + """, + """ + { + "type": "object", + "properties": { + "old_style_schema": {"type": "string"} + } + } + """); + } + + private AirbyteMessage createCatalogMessage(final JsonNode schema) { + return new AirbyteMessage().withType(AirbyteMessage.Type.CATALOG) + .withCatalog( + new AirbyteCatalog().withStreams(List.of(new AirbyteStream().withJsonSchema( + schema)))); + } + + } + + @Nested + class RecordDowngradeTest { + + private static final String STREAM_NAME = "foo_stream"; + private static final String NAMESPACE_NAME = "foo_namespace"; + + @Test + void testBasicDowngrade() { + final ConfiguredAirbyteCatalog catalog = createConfiguredAirbyteCatalog( + """ + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + """); + final JsonNode oldData = Jsons.deserialize( + """ + "42" + """); + + final io.airbyte.protocol.models.v0.AirbyteMessage downgradedMessage = new AirbyteMessageMigrationV1(validator) + .downgrade(createRecordMessage(oldData), Optional.of(catalog)); + + final io.airbyte.protocol.models.v0.AirbyteMessage expectedMessage = Jsons.deserialize( + """ + { + "type": "RECORD", + "record": { + "stream": "foo_stream", + "namespace": "foo_namespace", + "data": 42 + } + } + """, + io.airbyte.protocol.models.v0.AirbyteMessage.class); + assertEquals(expectedMessage, downgradedMessage); + } + + @Test + void testNullDowngrade() { + final AirbyteMessage oldMessage = new AirbyteMessage().withType(Type.RECORD); + final io.airbyte.protocol.models.v0.AirbyteMessage upgradedMessage = migration.downgrade(oldMessage, Optional.empty()); + final io.airbyte.protocol.models.v0.AirbyteMessage expectedMessage = new io.airbyte.protocol.models.v0.AirbyteMessage() + .withType(io.airbyte.protocol.models.v0.AirbyteMessage.Type.RECORD); + assertEquals(expectedMessage, upgradedMessage); + } + + /** + * Utility method to use the given catalog to downgrade the oldData, and assert that the result is + * equal to expectedDataString + * + * @param schemaString The JSON schema of the record + * @param oldDataString The data of the record to be downgraded + * @param expectedDataString The expected data after downgrading + */ + private void doTest(final String schemaString, final String oldDataString, final String expectedDataString) { + final ConfiguredAirbyteCatalog catalog = createConfiguredAirbyteCatalog(schemaString); + final JsonNode oldData = Jsons.deserialize(oldDataString); + + final io.airbyte.protocol.models.v0.AirbyteMessage downgradedMessage = new AirbyteMessageMigrationV1(validator) + .downgrade(createRecordMessage(oldData), Optional.of(catalog)); + + final JsonNode expectedDowngradedRecord = Jsons.deserialize(expectedDataString); + assertEquals(expectedDowngradedRecord, downgradedMessage.getRecord().getData()); + } + + @Test + void testNestedDowngrade() { + doTest( + """ + { + "type": "object", + "properties": { + "int": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "num": {"$ref": "WellKnownTypes.json#/definitions/Number"}, + "binary": {"$ref": "WellKnownTypes.json#/definitions/BinaryData"}, + "bool": {"$ref": "WellKnownTypes.json#/definitions/Boolean"}, + "object": { + "type": "object", + "properties": { + "int": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "arr": { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + }, + "array": { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "array_multitype": { + "type": "array", + "items": [{"$ref": "WellKnownTypes.json#/definitions/Integer"}, {"$ref": "WellKnownTypes.json#/definitions/String"}] + }, + "oneof": { + "type": "array", + "items": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + {"$ref": "WellKnownTypes.json#/definitions/Boolean"} + ] + } + } + } + } + """, + """ + { + "int": "42", + "num": "43.2", + "string": "42", + "bool": true, + "object": { + "int": "42" + }, + "array": ["42"], + "array_multitype": ["42", "42"], + "oneof": ["42", true], + "additionalProperty": "42" + } + """, + """ + { + "int": 42, + "num": 43.2, + "string": "42", + "bool": true, + "object": { + "int": 42 + }, + "array": [42], + "array_multitype": [42, "42"], + "oneof": [42, true], + "additionalProperty": "42" + } + """); + } + + @Test + void testWeirdDowngrade() { + doTest( + """ + { + "type": "object", + "properties": { + "raw_int": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "raw_num": {"$ref": "WellKnownTypes.json#/definitions/Number"}, + "bad_int": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "typeless_object": { + "properties": { + "foo": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + }, + "typeless_array": { + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "arr_obj_union1": { + "type": ["array", "object"], + "items": { + "type": "object", + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "name": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + }, + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "name": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + }, + "arr_obj_union2": { + "type": ["array", "object"], + "items": { + "type": "object", + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "name": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + }, + "properties": { + "id": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "name": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + }, + "empty_oneof": { + "oneOf": [] + } + } + } + """, + """ + { + "raw_int": 42, + "raw_num": 43.2, + "bad_int": "foo", + "typeless_object": { + "foo": "42" + }, + "typeless_array": ["42"], + "arr_obj_union1": [{"id": "42", "name": "arst"}, {"id": "43", "name": "qwfp"}], + "arr_obj_union2": {"id": "42", "name": "arst"}, + "empty_oneof": "42" + } + """, + """ + { + "raw_int": 42, + "raw_num": 43.2, + "bad_int": "foo", + "typeless_object": { + "foo": 42 + }, + "typeless_array": [42], + "arr_obj_union1": [{"id": 42, "name": "arst"}, {"id": 43, "name": "qwfp"}], + "arr_obj_union2": {"id": 42, "name": "arst"}, + "empty_oneof": "42" + } + """); + } + + @Test + void testEmptySchema() { + doTest( + """ + { + "type": "object", + "properties": { + "empty_schema_primitive": {}, + "empty_schema_array": {}, + "empty_schema_object": {}, + "implicit_array": { + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "implicit_object": { + "properties": { + "foo": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + } + } + """, + """ + { + "empty_schema_primitive": "42", + "empty_schema_array": ["42", false], + "empty_schema_object": {"foo": "42"}, + "implicit_array": ["42"], + "implicit_object": {"foo": "42"} + } + """, + """ + { + "empty_schema_primitive": "42", + "empty_schema_array": ["42", false], + "empty_schema_object": {"foo": "42"}, + "implicit_array": [42], + "implicit_object": {"foo": 42} + } + """); + } + + @Test + void testBacktracking() { + // These test cases verify that we correctly choose the most-correct oneOf option. + doTest( + """ + { + "type": "object", + "properties": { + "valid_option": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/Boolean"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + {"$ref": "WellKnownTypes.json#/definitions/String"} + ] + }, + "all_invalid": { + "oneOf": [ + { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Boolean"} + } + ] + }, + "nested_oneof": { + "oneOf": [ + { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + { + "type": "array", + "items": { + "type": "object", + "properties": { + "foo": { + "oneOf": [ + {"$ref": "WellKnownTypes.json#/definitions/Boolean"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + } + } + } + } + ] + }, + "mismatched_primitive": { + "oneOf": [ + { + "type": "object", + "properties": { + "foo": {"type": "object"}, + "bar": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + }, + { + "type": "object", + "properties": { + "foo": {"$ref": "WellKnownTypes.json#/definitions/Boolean"}, + "bar": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + ] + }, + "mismatched_text": { + "oneOf": [ + { + "type": "object", + "properties": { + "foo": {"type": "object"}, + "bar": {"$ref": "WellKnownTypes.json#/definitions/String"} + } + }, + { + "type": "object", + "properties": { + "foo": {"$ref": "WellKnownTypes.json#/definitions/String"}, + "bar": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + ] + }, + "mismatch_array": { + "oneOf": [ + { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + { + "type": "array", + "items": [ + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/String"}, + {"$ref": "WellKnownTypes.json#/definitions/Integer"} + ] + } + ] + } + } + } + """, + """ + { + "valid_option": "42", + "all_invalid": ["42", "arst"], + "nested_oneof": [{"foo": "42"}], + "mismatched_primitive": { + "foo": true, + "bar": "42" + }, + "mismatched_text": { + "foo": "bar", + "bar": "42" + }, + "mismatch_array": ["arst", "41", "42"] + } + """, + """ + { + "valid_option": 42, + "all_invalid": [42, "arst"], + "nested_oneof": [{"foo": 42}], + "mismatched_primitive": { + "foo": true, + "bar": 42 + }, + "mismatched_text": { + "foo": "bar", + "bar": 42 + }, + "mismatch_array": ["arst", "41", 42] + } + """); + } + + @Test + void testIncorrectSchema() { + doTest( + """ + { + "type": "object", + "properties": { + "bad_int": {"$ref": "WellKnownTypes.json#/definitions/Integer"}, + "bad_int_array": { + "type": "array", + "items": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + }, + "bad_int_obj": { + "type": "object", + "properties": { + "foo": {"$ref": "WellKnownTypes.json#/definitions/Integer"} + } + } + } + } + """, + """ + { + "bad_int": "arst", + "bad_int_array": ["arst"], + "bad_int_obj": {"foo": "arst"} + } + """, + """ + { + "bad_int": "arst", + "bad_int_array": ["arst"], + "bad_int_obj": {"foo": "arst"} + } + """); + } + + private ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(final String schema) { + return new ConfiguredAirbyteCatalog() + .withStreams(List.of(new ConfiguredAirbyteStream().withStream(new io.airbyte.protocol.models.AirbyteStream() + .withName(STREAM_NAME) + .withNamespace(NAMESPACE_NAME) + .withJsonSchema(Jsons.deserialize(schema))))); + } + + private AirbyteMessage createRecordMessage(final JsonNode data) { + return new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(STREAM_NAME).withNamespace(NAMESPACE_NAME).withData(data)); + } + + } + +} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1Test.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1Test.java new file mode 100644 index 0000000000000..d9753def6d2a2 --- /dev/null +++ b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/migrations/v1/ConfiguredAirbyteCatalogMigrationV1Test.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.v1; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import java.util.List; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * These depend on the same {@link SchemaMigrationV1} class as + * {@link io.airbyte.commons.protocol.migrations.v1.AirbyteMessageMigrationV1}. So, uh, I didn't + * bother writing a ton of tests for it. + * + * Check out {@link AirbyteMessageMigrationV1} for more comprehensive tests. Theoretically + * SchemaMigrationV1 should have its own set of tests, but for various (development history-related) + * reasons, that would be a lot of work. + */ +class ConfiguredAirbyteCatalogMigrationV1Test { + + private ConfiguredAirbyteCatalogMigrationV1 migration; + + @BeforeEach + void setup() { + migration = new ConfiguredAirbyteCatalogMigrationV1(); + } + + @Test + void testVersionMetadata() { + assertEquals("0.3.0", migration.getPreviousVersion().serialize()); + assertEquals("1.0.0", migration.getCurrentVersion().serialize()); + } + + @Test + void testBasicUpgrade() { + // This isn't actually a valid stream schema (since it's not an object) + // but this test case is mostly about preserving the message structure, so it's not super relevant + final io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog downgradedCatalog = new io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog() + .withStreams(List.of( + new io.airbyte.protocol.models.v0.ConfiguredAirbyteStream().withStream(new io.airbyte.protocol.models.v0.AirbyteStream().withJsonSchema( + Jsons.deserialize( + """ + { + "type": "string" + } + """))))); + + final ConfiguredAirbyteCatalog upgradedMessage = migration.upgrade(downgradedCatalog); + + final ConfiguredAirbyteCatalog expectedMessage = Jsons.deserialize( + """ + { + "streams": [ + { + "stream": { + "json_schema": { + "$ref": "WellKnownTypes.json#/definitions/String" + } + } + } + ] + } + """, + ConfiguredAirbyteCatalog.class); + assertEquals(expectedMessage, upgradedMessage); + } + + @Test + void testBasicDowngrade() { + // This isn't actually a valid stream schema (since it's not an object) + // but this test case is mostly about preserving the message structure, so it's not super relevant + final ConfiguredAirbyteCatalog upgradedCatalog = new ConfiguredAirbyteCatalog() + .withStreams(List.of( + new ConfiguredAirbyteStream().withStream(new AirbyteStream().withJsonSchema( + Jsons.deserialize(""" + { + "$ref": "WellKnownTypes.json#/definitions/String" + } + """))))); + + final io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog downgradedMessage = migration.downgrade(upgradedCatalog); + + final io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog expectedMessage = Jsons.deserialize( + """ + { + "streams": [ + { + "stream": { + "json_schema": { + "type": "string" + } + } + } + ] + } + """, + io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog.class); + assertEquals(expectedMessage, downgradedMessage); + } + +} diff --git a/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1SerDeTest.java b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1SerDeTest.java new file mode 100644 index 0000000000000..27f704fef9a6b --- /dev/null +++ b/airbyte-commons-protocol/src/test/java/io/airbyte/commons/protocol/serde/AirbyteMessageV1SerDeTest.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.serde; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.net.URI; +import java.net.URISyntaxException; +import org.junit.jupiter.api.Test; + +class AirbyteMessageV1SerDeTest { + + @Test + void v1SerDeRoundTripTest() throws URISyntaxException { + final AirbyteMessageV1Deserializer deser = new AirbyteMessageV1Deserializer(); + final AirbyteMessageV1Serializer ser = new AirbyteMessageV1Serializer(); + + final AirbyteMessage message = new AirbyteMessage() + .withType(Type.SPEC) + .withSpec( + new ConnectorSpecification() + .withProtocolVersion("1.0.0") + .withDocumentationUrl(new URI("file:///tmp/doc"))); + + final String serializedMessage = ser.serialize(message); + final AirbyteMessage deserializedMessage = deser.deserialize(Jsons.deserialize(serializedMessage)); + + assertEquals(message, deserializedMessage); + } + +} diff --git a/airbyte-commons-protocol/src/test/resources/WellKnownTypes.json b/airbyte-commons-protocol/src/test/resources/WellKnownTypes.json new file mode 100644 index 0000000000000..95d2ff9e26fad --- /dev/null +++ b/airbyte-commons-protocol/src/test/resources/WellKnownTypes.json @@ -0,0 +1,65 @@ +{ + "definitions": { + "String": { + "type": "string", + "description": "Arbitrary text" + }, + "BinaryData": { + "type": "string", + "description": "Arbitrary binary data. Represented as base64-encoded strings in the JSON transport. In the future, if we support other transports, may be encoded differently.\n", + "pattern": "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$" + }, + "Date": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}( BC)?$", + "description": "RFC 3339\u00a75.6's full-date format, extended with BC era support" + }, + "TimestampWithTimezone": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})( BC)?$", + "description": "An instant in time. Frequently simply referred to as just a timestamp, or timestamptz. Uses RFC 3339\u00a75.6's date-time format, requiring a \"T\" separator, and extended with BC era support. Note that we do _not_ accept Unix epochs here.\n" + }, + "TimestampWithoutTimezone": { + "type": "string", + "pattern": "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?( BC)?$", + "description": "Also known as a localdatetime, or just datetime. Under RFC 3339\u00a75.6, this would be represented as `full-date \"T\" partial-time`, extended with BC era support.\n" + }, + "TimeWithTimezone": { + "type": "string", + "pattern": "^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?(Z|[+\\-]\\d{1,2}:\\d{2})$", + "description": "An RFC 3339\u00a75.6 full-time" + }, + "TimeWithoutTimezone": { + "type": "string", + "pattern": "^\\d{2}:\\d{2}:\\d{2}(\\.\\d+)?$", + "description": "An RFC 3339\u00a75.6 partial-time" + }, + "Number": { + "type": "string", + "oneOf": [ + { + "pattern": "-?(0|[0-9]\\d*)(\\.\\d+)?" + }, + { + "enum": ["Infinity", "-Infinity", "NaN"] + } + ], + "description": "Note the mix of regex validation for normal numbers, and enum validation for special values." + }, + "Integer": { + "type": "string", + "oneOf": [ + { + "pattern": "-?(0|[0-9]\\d*)" + }, + { + "enum": ["Infinity", "-Infinity", "NaN"] + } + ] + }, + "Boolean": { + "type": "boolean", + "description": "Note the direct usage of a primitive boolean rather than string. Unlike Numbers and Integers, we don't expect unusual values here." + } + } +} diff --git a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java index b6b275548f241..48945711804f3 100644 --- a/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java +++ b/airbyte-commons-worker/src/test/java/io/airbyte/workers/internal/VersionedAirbyteStreamFactoryTest.java @@ -12,8 +12,8 @@ import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider; import io.airbyte.commons.protocol.AirbyteProtocolVersionedMigratorFactory; import io.airbyte.commons.protocol.ConfiguredAirbyteCatalogMigrator; -import io.airbyte.commons.protocol.migrations.AirbyteMessageMigrationV1; -import io.airbyte.commons.protocol.migrations.ConfiguredAirbyteCatalogMigrationV1; +import io.airbyte.commons.protocol.migrations.v1.AirbyteMessageMigrationV1; +import io.airbyte.commons.protocol.migrations.v1.ConfiguredAirbyteCatalogMigrationV1; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Deserializer; import io.airbyte.commons.protocol.serde.AirbyteMessageV0Serializer; import io.airbyte.commons.protocol.serde.AirbyteMessageV1Deserializer; diff --git a/airbyte-commons-worker/src/test/resources/version-detection/logs-without-spec-message.jsonl b/airbyte-commons-worker/src/test/resources/version-detection/logs-without-spec-message.jsonl index 267d601909253..3eee647a930e1 100644 --- a/airbyte-commons-worker/src/test/resources/version-detection/logs-without-spec-message.jsonl +++ b/airbyte-commons-worker/src/test/resources/version-detection/logs-without-spec-message.jsonl @@ -2,7 +2,7 @@ {"type":"LOG","log":{"level":"INFO","message":"Running integration: io.airbyte.integrations.destination.bigquery.BigQueryDestination"}} {"type":"LOG","log":{"level":"INFO","message":"Command: SPEC"}} {"type":"LOG","log":{"level":"INFO","message":"Integration config: IntegrationConfig{command=SPEC, configPath='null', catalogPath='null', statePath='null'}"}} -{"type":"RECORD","stream":"s","emitted_at":5,"data":{"protocol_version":"0.5.9","documentationUrl":"https://docs.airbyte.io/integrations/destinations/bigquery","connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","title":"BigQuery Destination Spec","type":"object","required":["project_id","dataset_location","dataset_id"],"additionalProperties":true,"properties":{"project_id":{"type":"string","description":"The GCP project ID for the project containing the target BigQuery dataset. Read more here.","title":"Project ID","order":0},"dataset_location":{"type":"string","description":"The location of the dataset. Warning: Changes made after creation will not be applied. Read more here.","title":"Dataset Location","order":1,"enum":["US","EU","asia-east1","asia-east2","asia-northeast1","asia-northeast2","asia-northeast3","asia-south1","asia-south2","asia-southeast1","asia-southeast2","australia-southeast1","australia-southeast2","europe-central2","europe-north1","europe-west1","europe-west2","europe-west3","europe-west4","europe-west6","northamerica-northeast1","northamerica-northeast2","southamerica-east1","southamerica-west1","us-central1","us-east1","us-east4","us-west1","us-west2","us-west3","us-west4"]},"dataset_id":{"type":"string","description":"The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more here.","title":"Default Dataset ID","order":2},"loading_method":{"type":"object","title":"Loading Method","description":"Loading method used to send select the way data will be uploaded to BigQuery.
Standard Inserts - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging.
GCS Staging - Writes large batches of records to a file, uploads the file to GCS, then uses COPY INTO table to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging here.","order":3,"oneOf":[{"title":"Standard Inserts","required":["method"],"properties":{"method":{"type":"string","const":"Standard"}}},{"title":"GCS Staging","required":["method","gcs_bucket_name","gcs_bucket_path","credential"],"properties":{"method":{"type":"string","const":"GCS Staging","order":0},"credential":{"title":"Credential","description":"An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more here.","type":"object","order":1,"oneOf":[{"title":"HMAC key","required":["credential_type","hmac_key_access_id","hmac_key_secret"],"properties":{"credential_type":{"type":"string","const":"HMAC_KEY","order":0},"hmac_key_access_id":"**********","type":"string","description":"HMAC key access ID. When linked to a service account, this ID is 61 characters long; when linked to a user account, it is 24 characters long.","title":"HMAC Key Access ID","airbyte_secret":true,"examples":["1234567890abcdefghij1234"],"order":1},"hmac_key_secret":"**********","type":"string","description":"The corresponding secret for the access ID. It is a 40-character base-64 encoded string.","title":"HMAC Key Secret","airbyte_secret":true,"examples":["1234567890abcdefghij1234567890ABCDEFGHIJ"],"order":2}]},"gcs_bucket_name":{"title":"GCS Bucket Name","type":"string","description":"The name of the GCS bucket. Read more here.","examples":["airbyte_sync"],"order":2},"gcs_bucket_path":{"title":"GCS Bucket Path","description":"Directory under the GCS bucket where data will be written.","type":"string","examples":["data_sync/test"],"order":3},"keep_files_in_gcs-bucket":{"type":"string","description":"This upload method is supposed to temporary store records in GCS bucket. By this select you can chose if these records should be removed from GCS when migration has finished. The default \"Delete all tmp files from GCS\" value is used if not set explicitly.","title":"GCS Tmp Files Afterward Processing (Optional)","default":"Delete all tmp files from GCS","enum":["Delete all tmp files from GCS","Keep all tmp files in GCS"],"order":4}}}]},"credentials_json":"**********","type":"string","description":"The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.","title":"Service Account Key JSON (Required for cloud, optional for open-source)","airbyte_secret":true,"order":4},"transformation_priority":{"type":"string","description":"Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Read more about interactive run type here. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit. Read more about batch queries here. The default \"interactive\" value is used if not set explicitly.","title":"Transformation Query Run Type (Optional)","default":"interactive","enum":["interactive","batch"],"order":5},"big_query_client_buffer_size_mb":{"title":"Google BigQuery Client Chunk Size (Optional)","description":"Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more here.","type":"integer","minimum":1,"maximum":15,"default":15,"examples":["15"],"order":6}}},"supportsIncremental":true,"supportsNormalization":true,"supportsDBT":true,"supported_destination_sync_modes":["overwrite","append","append_dedup"]} +{"type":"RECORD","record":{"stream":"s","emitted_at":5,"data":{"protocol_version":"0.5.9","documentationUrl":"https://docs.airbyte.io/integrations/destinations/bigquery","connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","title":"BigQuery Destination Spec","type":"object","required":["project_id","dataset_location","dataset_id"],"additionalProperties":true,"properties":{"project_id":{"type":"string","description":"The GCP project ID for the project containing the target BigQuery dataset. Read more here.","title":"Project ID","order":0},"dataset_location":{"type":"string","description":"The location of the dataset. Warning: Changes made after creation will not be applied. Read more here.","title":"Dataset Location","order":1,"enum":["US","EU","asia-east1","asia-east2","asia-northeast1","asia-northeast2","asia-northeast3","asia-south1","asia-south2","asia-southeast1","asia-southeast2","australia-southeast1","australia-southeast2","europe-central2","europe-north1","europe-west1","europe-west2","europe-west3","europe-west4","europe-west6","northamerica-northeast1","northamerica-northeast2","southamerica-east1","southamerica-west1","us-central1","us-east1","us-east4","us-west1","us-west2","us-west3","us-west4"]},"dataset_id":{"type":"string","description":"The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more here.","title":"Default Dataset ID","order":2},"loading_method":{"type":"object","title":"Loading Method","description":"Loading method used to send select the way data will be uploaded to BigQuery.
Standard Inserts - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging.
GCS Staging - Writes large batches of records to a file, uploads the file to GCS, then uses COPY INTO table to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging here.","order":3,"oneOf":[{"title":"Standard Inserts","required":["method"],"properties":{"method":{"type":"string","const":"Standard"}}},{"title":"GCS Staging","required":["method","gcs_bucket_name","gcs_bucket_path","credential"],"properties":{"method":{"type":"string","const":"GCS Staging","order":0},"credential":{"title":"Credential","description":"An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more here.","type":"object","order":1,"oneOf":[{"title":"HMAC key","required":["credential_type","hmac_key_access_id","hmac_key_secret"],"properties":{"credential_type":{"type":"string","const":"HMAC_KEY","order":0},"hmac_key_access_id":"**********","type":"string","description":"HMAC key access ID. When linked to a service account, this ID is 61 characters long; when linked to a user account, it is 24 characters long.","title":"HMAC Key Access ID","airbyte_secret":true,"examples":["1234567890abcdefghij1234"],"order":1},"hmac_key_secret":"**********","type":"string","description":"The corresponding secret for the access ID. It is a 40-character base-64 encoded string.","title":"HMAC Key Secret","airbyte_secret":true,"examples":["1234567890abcdefghij1234567890ABCDEFGHIJ"],"order":2}]},"gcs_bucket_name":{"title":"GCS Bucket Name","type":"string","description":"The name of the GCS bucket. Read more here.","examples":["airbyte_sync"],"order":2},"gcs_bucket_path":{"title":"GCS Bucket Path","description":"Directory under the GCS bucket where data will be written.","type":"string","examples":["data_sync/test"],"order":3},"keep_files_in_gcs-bucket":{"type":"string","description":"This upload method is supposed to temporary store records in GCS bucket. By this select you can chose if these records should be removed from GCS when migration has finished. The default \"Delete all tmp files from GCS\" value is used if not set explicitly.","title":"GCS Tmp Files Afterward Processing (Optional)","default":"Delete all tmp files from GCS","enum":["Delete all tmp files from GCS","Keep all tmp files in GCS"],"order":4}}}]},"credentials_json":"**********","type":"string","description":"The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.","title":"Service Account Key JSON (Required for cloud, optional for open-source)","airbyte_secret":true,"order":4},"transformation_priority":{"type":"string","description":"Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Read more about interactive run type here. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit. Read more about batch queries here. The default \"interactive\" value is used if not set explicitly.","title":"Transformation Query Run Type (Optional)","default":"interactive","enum":["interactive","batch"],"order":5},"big_query_client_buffer_size_mb":{"title":"Google BigQuery Client Chunk Size (Optional)","description":"Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more here.","type":"integer","minimum":1,"maximum":15,"default":15,"examples":["15"],"order":6}}},"supportsIncremental":true,"supportsNormalization":true,"supportsDBT":true,"supported_destination_sync_modes":["overwrite","append","append_dedup"]}} {"type":"LOG","log":{"level":"INFO","message":"integration args: {spec=null}"}} {"type":"LOG","log":{"level":"INFO","message":"Running integration: io.airbyte.integrations.destination.bigquery.BigQueryDestination"}} {"type":"LOG","log":{"level":"INFO","message":"Command: SPEC"}} @@ -15,4 +15,4 @@ {"type":"LOG","log":{"level":"INFO","message":"Running integration: io.airbyte.integrations.destination.bigquery.BigQueryDestination"}} {"type":"LOG","log":{"level":"INFO","message":"Command: SPEC"}} {"type":"LOG","log":{"level":"INFO","message":"Integration config: IntegrationConfig{command=SPEC, configPath='null', catalogPath='null', statePath='null'}"}} -{"type":"RECORD","stream":"s","emitted_at":6,"data":{"protocol_version":"0.5.9","documentationUrl":"https://docs.airbyte.io/integrations/destinations/bigquery","connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","title":"BigQuery Destination Spec","type":"object","required":["project_id","dataset_location","dataset_id"],"additionalProperties":true,"properties":{"project_id":{"type":"string","description":"The GCP project ID for the project containing the target BigQuery dataset. Read more here.","title":"Project ID","order":0},"dataset_location":{"type":"string","description":"The location of the dataset. Warning: Changes made after creation will not be applied. Read more here.","title":"Dataset Location","order":1,"enum":["US","EU","asia-east1","asia-east2","asia-northeast1","asia-northeast2","asia-northeast3","asia-south1","asia-south2","asia-southeast1","asia-southeast2","australia-southeast1","australia-southeast2","europe-central2","europe-north1","europe-west1","europe-west2","europe-west3","europe-west4","europe-west6","northamerica-northeast1","northamerica-northeast2","southamerica-east1","southamerica-west1","us-central1","us-east1","us-east4","us-west1","us-west2","us-west3","us-west4"]},"dataset_id":{"type":"string","description":"The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more here.","title":"Default Dataset ID","order":2},"loading_method":{"type":"object","title":"Loading Method","description":"Loading method used to send select the way data will be uploaded to BigQuery.
Standard Inserts - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging.
GCS Staging - Writes large batches of records to a file, uploads the file to GCS, then uses COPY INTO table to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging here.","order":3,"oneOf":[{"title":"Standard Inserts","required":["method"],"properties":{"method":{"type":"string","const":"Standard"}}},{"title":"GCS Staging","required":["method","gcs_bucket_name","gcs_bucket_path","credential"],"properties":{"method":{"type":"string","const":"GCS Staging","order":0},"credential":{"title":"Credential","description":"An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more here.","type":"object","order":1,"oneOf":[{"title":"HMAC key","required":["credential_type","hmac_key_access_id","hmac_key_secret"],"properties":{"credential_type":{"type":"string","const":"HMAC_KEY","order":0},"hmac_key_access_id":"**********","type":"string","description":"HMAC key access ID. When linked to a service account, this ID is 61 characters long; when linked to a user account, it is 24 characters long.","title":"HMAC Key Access ID","airbyte_secret":true,"examples":["1234567890abcdefghij1234"],"order":1},"hmac_key_secret":"**********","type":"string","description":"The corresponding secret for the access ID. It is a 40-character base-64 encoded string.","title":"HMAC Key Secret","airbyte_secret":true,"examples":["1234567890abcdefghij1234567890ABCDEFGHIJ"],"order":2}]},"gcs_bucket_name":{"title":"GCS Bucket Name","type":"string","description":"The name of the GCS bucket. Read more here.","examples":["airbyte_sync"],"order":2},"gcs_bucket_path":{"title":"GCS Bucket Path","description":"Directory under the GCS bucket where data will be written.","type":"string","examples":["data_sync/test"],"order":3},"keep_files_in_gcs-bucket":{"type":"string","description":"This upload method is supposed to temporary store records in GCS bucket. By this select you can chose if these records should be removed from GCS when migration has finished. The default \"Delete all tmp files from GCS\" value is used if not set explicitly.","title":"GCS Tmp Files Afterward Processing (Optional)","default":"Delete all tmp files from GCS","enum":["Delete all tmp files from GCS","Keep all tmp files in GCS"],"order":4}}}]},"credentials_json":"**********","type":"string","description":"The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.","title":"Service Account Key JSON (Required for cloud, optional for open-source)","airbyte_secret":true,"order":4},"transformation_priority":{"type":"string","description":"Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Read more about interactive run type here. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit. Read more about batch queries here. The default \"interactive\" value is used if not set explicitly.","title":"Transformation Query Run Type (Optional)","default":"interactive","enum":["interactive","batch"],"order":5},"big_query_client_buffer_size_mb":{"title":"Google BigQuery Client Chunk Size (Optional)","description":"Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more here.","type":"integer","minimum":1,"maximum":15,"default":15,"examples":["15"],"order":6}}},"supportsIncremental":true,"supportsNormalization":true,"supportsDBT":true,"supported_destination_sync_modes":["overwrite","append","append_dedup"]} +{"type":"RECORD","record":{"stream":"s","emitted_at":6,"data":{"protocol_version":"0.5.9","documentationUrl":"https://docs.airbyte.io/integrations/destinations/bigquery","connectionSpecification":{"$schema":"http://json-schema.org/draft-07/schema#","title":"BigQuery Destination Spec","type":"object","required":["project_id","dataset_location","dataset_id"],"additionalProperties":true,"properties":{"project_id":{"type":"string","description":"The GCP project ID for the project containing the target BigQuery dataset. Read more here.","title":"Project ID","order":0},"dataset_location":{"type":"string","description":"The location of the dataset. Warning: Changes made after creation will not be applied. Read more here.","title":"Dataset Location","order":1,"enum":["US","EU","asia-east1","asia-east2","asia-northeast1","asia-northeast2","asia-northeast3","asia-south1","asia-south2","asia-southeast1","asia-southeast2","australia-southeast1","australia-southeast2","europe-central2","europe-north1","europe-west1","europe-west2","europe-west3","europe-west4","europe-west6","northamerica-northeast1","northamerica-northeast2","southamerica-east1","southamerica-west1","us-central1","us-east1","us-east4","us-west1","us-west2","us-west3","us-west4"]},"dataset_id":{"type":"string","description":"The default BigQuery Dataset ID that tables are replicated to if the source does not specify a namespace. Read more here.","title":"Default Dataset ID","order":2},"loading_method":{"type":"object","title":"Loading Method","description":"Loading method used to send select the way data will be uploaded to BigQuery.
Standard Inserts - Direct uploading using SQL INSERT statements. This method is extremely inefficient and provided only for quick testing. In almost all cases, you should use staging.
GCS Staging - Writes large batches of records to a file, uploads the file to GCS, then uses COPY INTO table to upload the file. Recommended for most workloads for better speed and scalability. Read more about GCS Staging here.","order":3,"oneOf":[{"title":"Standard Inserts","required":["method"],"properties":{"method":{"type":"string","const":"Standard"}}},{"title":"GCS Staging","required":["method","gcs_bucket_name","gcs_bucket_path","credential"],"properties":{"method":{"type":"string","const":"GCS Staging","order":0},"credential":{"title":"Credential","description":"An HMAC key is a type of credential and can be associated with a service account or a user account in Cloud Storage. Read more here.","type":"object","order":1,"oneOf":[{"title":"HMAC key","required":["credential_type","hmac_key_access_id","hmac_key_secret"],"properties":{"credential_type":{"type":"string","const":"HMAC_KEY","order":0},"hmac_key_access_id":"**********","type":"string","description":"HMAC key access ID. When linked to a service account, this ID is 61 characters long; when linked to a user account, it is 24 characters long.","title":"HMAC Key Access ID","airbyte_secret":true,"examples":["1234567890abcdefghij1234"],"order":1},"hmac_key_secret":"**********","type":"string","description":"The corresponding secret for the access ID. It is a 40-character base-64 encoded string.","title":"HMAC Key Secret","airbyte_secret":true,"examples":["1234567890abcdefghij1234567890ABCDEFGHIJ"],"order":2}]},"gcs_bucket_name":{"title":"GCS Bucket Name","type":"string","description":"The name of the GCS bucket. Read more here.","examples":["airbyte_sync"],"order":2},"gcs_bucket_path":{"title":"GCS Bucket Path","description":"Directory under the GCS bucket where data will be written.","type":"string","examples":["data_sync/test"],"order":3},"keep_files_in_gcs-bucket":{"type":"string","description":"This upload method is supposed to temporary store records in GCS bucket. By this select you can chose if these records should be removed from GCS when migration has finished. The default \"Delete all tmp files from GCS\" value is used if not set explicitly.","title":"GCS Tmp Files Afterward Processing (Optional)","default":"Delete all tmp files from GCS","enum":["Delete all tmp files from GCS","Keep all tmp files in GCS"],"order":4}}}]},"credentials_json":"**********","type":"string","description":"The contents of the JSON service account key. Check out the docs if you need help generating this key. Default credentials will be used if this field is left empty.","title":"Service Account Key JSON (Required for cloud, optional for open-source)","airbyte_secret":true,"order":4},"transformation_priority":{"type":"string","description":"Interactive run type means that the query is executed as soon as possible, and these queries count towards concurrent rate limit and daily limit. Read more about interactive run type here. Batch queries are queued and started as soon as idle resources are available in the BigQuery shared resource pool, which usually occurs within a few minutes. Batch queries don’t count towards your concurrent rate limit. Read more about batch queries here. The default \"interactive\" value is used if not set explicitly.","title":"Transformation Query Run Type (Optional)","default":"interactive","enum":["interactive","batch"],"order":5},"big_query_client_buffer_size_mb":{"title":"Google BigQuery Client Chunk Size (Optional)","description":"Google BigQuery client's chunk (buffer) size (MIN=1, MAX = 15) for each table. The size that will be written by a single RPC. Written data will be buffered and only flushed upon reaching this size or closing the channel. The default 15MB value is used if not set explicitly. Read more here.","type":"integer","minimum":1,"maximum":15,"default":15,"examples":["15"],"order":6}}},"supportsIncremental":true,"supportsNormalization":true,"supportsDBT":true,"supported_destination_sync_modes":["overwrite","append","append_dedup"]}} diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java index 8dc663709a432..42bf0b3388975 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/StateDecoratingIterator.java @@ -149,7 +149,7 @@ protected AirbyteMessage computeNext() { } catch (final Exception e) { emitIntermediateState = true; hasCaughtException = true; - LOGGER.error("Message iterator failed to read next record. {}", e.getMessage()); + LOGGER.error("Message iterator failed to read next record.", e); optionalIntermediateMessage = getIntermediateMessage(); return optionalIntermediateMessage.orElse(endOfData()); } diff --git a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java index 5c9507bee977f..5ff1e2008ce80 100644 --- a/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java +++ b/airbyte-json-validation/src/main/java/io/airbyte/validation/json/JsonSchemaValidator.java @@ -37,7 +37,7 @@ public class JsonSchemaValidator { static { try { DEFAULT_BASE_URI = new URI("file:///app/nonexistent_file.json"); - } catch (URISyntaxException e) { + } catch (final URISyntaxException e) { throw new RuntimeException(e); } } @@ -58,7 +58,7 @@ public JsonSchemaValidator() { * @param baseUri The base URI for schema resolution */ @VisibleForTesting - protected JsonSchemaValidator(URI baseUri) { + public JsonSchemaValidator(final URI baseUri) { this.jsonSchemaFactory = JsonSchemaFactory.getInstance(SpecVersion.VersionFlag.V7); this.baseUri = baseUri; } @@ -90,12 +90,12 @@ private Set validateInternal(final JsonNode schemaJson, final Preconditions.checkNotNull(objectJson); // Default to draft-07, but have handling for the other metaschemas that networknt supports - JsonMetaSchema metaschema; - JsonNode metaschemaNode = schemaJson.get("$schema"); + final JsonMetaSchema metaschema; + final JsonNode metaschemaNode = schemaJson.get("$schema"); if (metaschemaNode == null || metaschemaNode.asText() == null || metaschemaNode.asText().isEmpty()) { metaschema = JsonMetaSchema.getV7(); } else { - String metaschemaString = metaschemaNode.asText(); + final String metaschemaString = metaschemaNode.asText(); // We're not using "http://....".equals(), because we want to avoid weirdness with https, etc. if (metaschemaString.contains("json-schema.org/draft-04")) { metaschema = JsonMetaSchema.getV4(); @@ -110,13 +110,13 @@ private Set validateInternal(final JsonNode schemaJson, final } } - ValidationContext context = new ValidationContext( + final ValidationContext context = new ValidationContext( jsonSchemaFactory.getUriFactory(), null, metaschema, jsonSchemaFactory, null); - JsonSchema schema = new JsonSchema( + final JsonSchema schema = new JsonSchema( context, baseUri, schemaJson); diff --git a/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaPrimitiveUtil.java b/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaPrimitiveUtil.java index 6fa8cc4a70ef2..fefc12a851713 100644 --- a/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaPrimitiveUtil.java +++ b/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaPrimitiveUtil.java @@ -44,8 +44,13 @@ public enum JsonSchemaPrimitive { } public static final Set VO_JSON_SCHEMA_PRIMITIVE_SET = - ImmutableSet.of(JsonSchemaPrimitive.STRING, JsonSchemaPrimitive.NUMBER, - JsonSchemaPrimitive.OBJECT, JsonSchemaPrimitive.ARRAY, JsonSchemaPrimitive.BOOLEAN, JsonSchemaPrimitive.NULL); + ImmutableSet.of( + JsonSchemaPrimitive.STRING, + JsonSchemaPrimitive.NUMBER, + JsonSchemaPrimitive.OBJECT, + JsonSchemaPrimitive.ARRAY, + JsonSchemaPrimitive.BOOLEAN, + JsonSchemaPrimitive.NULL); public static final boolean isV0Schema(final JsonSchemaPrimitive type) { return VO_JSON_SCHEMA_PRIMITIVE_SET.contains(type); diff --git a/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaReferenceTypes.java b/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaReferenceTypes.java index 9286cbfbecca3..608ba7f97dead 100644 --- a/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaReferenceTypes.java +++ b/airbyte-protocol/protocol-models/src/main/java/io/airbyte/protocol/models/JsonSchemaReferenceTypes.java @@ -4,8 +4,10 @@ package io.airbyte.protocol.models; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import io.airbyte.commons.json.Jsons; import java.util.Map; import java.util.Set; @@ -17,16 +19,25 @@ public class JsonSchemaReferenceTypes { "integer", "boolean"); - public static final String STRING_REFERENCE = "WellKnownTypes.json#/definitions/String"; - public static final String BINARY_DATA_REFERENCE = "WellKnownTypes.json#/definitions/BinaryData"; - public static final String NUMBER_REFERENCE = "WellKnownTypes.json#/definitions/Number"; - public static final String INTEGER_REFERENCE = "WellKnownTypes.json#/definitions/Integer"; - public static final String BOOLEAN_REFERENCE = "WellKnownTypes.json#/definitions/Boolean"; - public static final String DATE_REFERENCE = "WellKnownTypes.json#/definitions/Date"; - public static final String TIMESTAMP_WITH_TIMEZONE_REFERENCE = "WellKnownTypes.json#/definitions/TimestampWithTimezone"; - public static final String TIMESTAMP_WITHOUT_TIMEZONE_REFERENCE = "WellKnownTypes.json#/definitions/TimestampWithoutTimezone"; - public static final String TIME_WITH_TIMEZONE_REFERENCE = "WellKnownTypes.json#/definitions/TimeWithTimezone"; - public static final String TIME_WITHOUT_TIMEZONE_REFERENCE = "WellKnownTypes.json#/definitions/TimeWithoutTimezone"; + public static final String REF_KEY = "$ref"; + public static final String TYPE_KEY = "type"; + public static final String ONEOF_KEY = "oneOf"; + public static final String PROPERTIES_KEY = "properties"; + public static final String ITEMS_KEY = "items"; + public static final String OBJECT_TYPE = "object"; + public static final String ARRAY_TYPE = "array"; + + public static final String WELL_KNOWN_TYPES_FILENAME = "WellKnownTypes.json"; + public static final String STRING_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/String"; + public static final String BINARY_DATA_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/BinaryData"; + public static final String NUMBER_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/Number"; + public static final String INTEGER_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/Integer"; + public static final String BOOLEAN_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/Boolean"; + public static final String DATE_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/Date"; + public static final String TIMESTAMP_WITH_TIMEZONE_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/TimestampWithTimezone"; + public static final String TIMESTAMP_WITHOUT_TIMEZONE_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/TimestampWithoutTimezone"; + public static final String TIME_WITH_TIMEZONE_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/TimeWithTimezone"; + public static final String TIME_WITHOUT_TIMEZONE_REFERENCE = WELL_KNOWN_TYPES_FILENAME + "#/definitions/TimeWithoutTimezone"; /** * This is primarily useful for migrating from protocol v0 to v1. It provides a mapping from the old @@ -44,4 +55,47 @@ public class JsonSchemaReferenceTypes { "boolean", BOOLEAN_REFERENCE, "date", DATE_REFERENCE); + public static final Map REFERENCE_TYPE_TO_OLD_TYPE = ImmutableMap.of( + TIMESTAMP_WITH_TIMEZONE_REFERENCE, + (ObjectNode) Jsons.deserialize( + """ + {"type": "string", "airbyte_type": "timestamp_with_timezone", "format": "date-time"} + """), + TIMESTAMP_WITHOUT_TIMEZONE_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "string", "airbyte_type": "timestamp_without_timezone", "format": "date-time"} + """), + TIME_WITH_TIMEZONE_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "string", "airbyte_type": "time_with_timezone", "format": "time"} + """), + TIME_WITHOUT_TIMEZONE_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "string", "airbyte_type": "time_without_timezone", "format": "time"} + """), + DATE_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "string", "format": "date"} + """), + INTEGER_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "number", "airbyte_type": "integer"} + """), + NUMBER_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "number"} + """), + BOOLEAN_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "boolean"} + """), + STRING_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "string"} + """), + BINARY_DATA_REFERENCE, (ObjectNode) Jsons.deserialize( + """ + {"type": "string", "contentEncoding": "base64"} + """)); + } diff --git a/airbyte-test-utils/src/main/java/io/airbyte/test/utils/AirbyteAcceptanceTestHarness.java b/airbyte-test-utils/src/main/java/io/airbyte/test/utils/AirbyteAcceptanceTestHarness.java index cb66fe314fa2a..049bfaf7ee436 100644 --- a/airbyte-test-utils/src/main/java/io/airbyte/test/utils/AirbyteAcceptanceTestHarness.java +++ b/airbyte-test-utils/src/main/java/io/airbyte/test/utils/AirbyteAcceptanceTestHarness.java @@ -585,6 +585,19 @@ public List retrieveSourceRecords(final Database database, final Strin return database.query(context -> context.fetch(String.format("SELECT * FROM %s;", table))) .stream() .map(Record::intoMap) + .map(rec -> { + // The protocol requires converting numbers to strings. source-postgres does that internally, + // but we're querying the DB directly, so we have to do it manually. + final Map stringifiedNumbers = new HashMap<>(); + for (final String key : rec.keySet()) { + Object o = rec.get(key); + if (o instanceof Number) { + o = o.toString(); + } + stringifiedNumbers.put(key, o); + } + return stringifiedNumbers; + }) .map(Jsons::jsonNode) .collect(Collectors.toList()); } diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/BasicAcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/BasicAcceptanceTests.java index 16c1de7fa20dc..ec04195bedff4 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/BasicAcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/BasicAcceptanceTests.java @@ -46,7 +46,6 @@ import io.airbyte.api.client.model.generated.ConnectionScheduleType; import io.airbyte.api.client.model.generated.ConnectionState; import io.airbyte.api.client.model.generated.ConnectionStatus; -import io.airbyte.api.client.model.generated.DataType; import io.airbyte.api.client.model.generated.DestinationDefinitionIdRequestBody; import io.airbyte.api.client.model.generated.DestinationDefinitionIdWithWorkspaceId; import io.airbyte.api.client.model.generated.DestinationDefinitionRead; @@ -148,6 +147,9 @@ class BasicAcceptanceTests { private static PostgreSQLContainer sourcePsql; private static final String TYPE = "type"; + private static final String REF = "$ref"; + private static final String INTEGER_REFERENCE = "WellKnownTypes.json#/definitions/Integer"; + private static final String STRING_REFERENCE = "WellKnownTypes.json#/definitions/String"; private static final String PUBLIC = "public"; private static final String E2E_TEST_SOURCE = "E2E Test Source -"; private static final String INFINITE_FEED = "INFINITE_FEED"; @@ -317,8 +319,8 @@ void testDiscoverSourceSchema() throws ApiException { final AirbyteCatalog actual = testHarness.discoverSourceSchema(sourceId); final Map> fields = ImmutableMap.of( - COLUMN_ID, ImmutableMap.of(TYPE, DataType.NUMBER.getValue(), "airbyte_type", "integer"), - COLUMN_NAME, ImmutableMap.of(TYPE, DataType.STRING.getValue())); + COLUMN_ID, ImmutableMap.of(REF, INTEGER_REFERENCE), + COLUMN_NAME, ImmutableMap.of(REF, STRING_REFERENCE)); final JsonNode jsonSchema = Jsons.jsonNode(ImmutableMap.builder() .put(TYPE, "object") .put("properties", fields) @@ -569,8 +571,8 @@ void testIncrementalDedupeSync() throws Exception { // add new records and run again. final Database source = testHarness.getSourceDatabase(); final List expectedRawRecords = testHarness.retrieveSourceRecords(source, STREAM_NAME); - expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, "sherif").build())); - expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 7).put(COLUMN_NAME, "chris").build())); + expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, "sherif").build())); + expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "7").put(COLUMN_NAME, "chris").build())); source.query(ctx -> ctx.execute("UPDATE id_and_name SET id=6 WHERE name='sherif'")); source.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(7, 'chris')")); // retrieve latest snapshot of source records after modifications; the deduplicated table in @@ -623,7 +625,7 @@ void testIncrementalSync() throws Exception { final Database source = testHarness.getSourceDatabase(); // get contents of source before mutating records. final List expectedRecords = testHarness.retrieveSourceRecords(source, STREAM_NAME); - expectedRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, GERALT).build())); + expectedRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, GERALT).build())); // add a new record source.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(6, 'geralt')")); // mutate a record that was already synced with out updating its cursor value. if we are actually @@ -921,7 +923,7 @@ void testSyncAfterUpgradeToPerStreamState(final TestInfo testInfo) throws Except final Database sourceDatabase = testHarness.getSourceDatabase(); // get contents of source before mutating records. final List expectedRecords = testHarness.retrieveSourceRecords(sourceDatabase, STREAM_NAME); - expectedRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, GERALT).build())); + expectedRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, GERALT).build())); // add a new record sourceDatabase.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(6, 'geralt')")); // mutate a record that was already synced with out updating its cursor value. if we are actually @@ -1222,9 +1224,9 @@ void testIncrementalSyncMultipleStreams() throws Exception { testHarness.retrieveSourceRecords(source, STAGING_SCHEMA_NAME + "." + COOL_EMPLOYEES_TABLE_NAME); final List expectedRecordsAwesomePeople = testHarness.retrieveSourceRecords(source, STAGING_SCHEMA_NAME + "." + AWESOME_PEOPLE_TABLE_NAME); - expectedRecordsIdAndName.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, GERALT).build())); - expectedRecordsCoolEmployees.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, GERALT).build())); - expectedRecordsAwesomePeople.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 3).put(COLUMN_NAME, GERALT).build())); + expectedRecordsIdAndName.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, GERALT).build())); + expectedRecordsCoolEmployees.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, GERALT).build())); + expectedRecordsAwesomePeople.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "3").put(COLUMN_NAME, GERALT).build())); // add a new record to each table source.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(6, 'geralt')")); source.query(ctx -> ctx.execute("INSERT INTO staging.cool_employees(id, name) VALUES(6, 'geralt')")); @@ -1459,8 +1461,8 @@ void testIncrementalDedupeSyncRemoveOneColumn() throws Exception { source.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(6, 'mike')")); source.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(7, 'chris')")); // The expected new raw records should only have the ID column. - expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).build())); - expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 7).build())); + expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").build())); + expectedRawRecords.add(Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "7").build())); final JobInfoRead connectionSyncRead2 = apiClient.getConnectionApi() .syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); waitForSuccessfulJob(apiClient.getJobsApi(), connectionSyncRead2.getJob()); diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/CdcAcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/CdcAcceptanceTests.java index 807d6a7c73a01..0127c1ad0e308 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/CdcAcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/CdcAcceptanceTests.java @@ -194,11 +194,11 @@ void testIncrementalCdcSync(final TestInfo testInfo) throws Exception { // new value and an updated_at time corresponding to this update query source.query(ctx -> ctx.execute("UPDATE id_and_name SET name='yennefer' WHERE id=2")); expectedIdAndNameRecords.add(new DestinationCdcRecordMatcher( - Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, "geralt").build()), + Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, "geralt").build()), beforeFirstUpdate, Optional.empty())); expectedIdAndNameRecords.add(new DestinationCdcRecordMatcher( - Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 2).put(COLUMN_NAME, "yennefer").build()), + Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "2").put(COLUMN_NAME, "yennefer").build()), beforeFirstUpdate, Optional.empty())); @@ -206,11 +206,11 @@ void testIncrementalCdcSync(final TestInfo testInfo) throws Exception { source.query(ctx -> ctx.execute("INSERT INTO color_palette(id, color) VALUES(4, 'yellow')")); source.query(ctx -> ctx.execute("UPDATE color_palette SET color='purple' WHERE id=2")); expectedColorPaletteRecords.add(new DestinationCdcRecordMatcher( - Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 4).put(COLUMN_COLOR, "yellow").build()), + Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "4").put(COLUMN_COLOR, "yellow").build()), beforeFirstUpdate, Optional.empty())); expectedColorPaletteRecords.add(new DestinationCdcRecordMatcher( - Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 2).put(COLUMN_COLOR, "purple").build()), + Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "2").put(COLUMN_COLOR, "purple").build()), beforeFirstUpdate, Optional.empty())); @@ -298,7 +298,7 @@ void testDeleteRecordCdcSync(final TestInfo testInfo) throws Exception { source.query(ctx -> ctx.execute("DELETE FROM id_and_name WHERE id=1")); final Map deletedRecordMap = new HashMap<>(); - deletedRecordMap.put(COLUMN_ID, 1); + deletedRecordMap.put(COLUMN_ID, "1"); deletedRecordMap.put(COLUMN_NAME, null); expectedIdAndNameRecords.add(new DestinationCdcRecordMatcher( Jsons.jsonNode(deletedRecordMap), @@ -431,13 +431,13 @@ void testPartialResetFromStreamSelection(final TestInfo testInfo) throws Excepti final Instant beforeInsert = Instant.now(); source.query(ctx -> ctx.execute("INSERT INTO id_and_name(id, name) VALUES(6, 'geralt')")); expectedIdAndNameRecords.add(new DestinationCdcRecordMatcher( - Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 6).put(COLUMN_NAME, "geralt").build()), + Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "6").put(COLUMN_NAME, "geralt").build()), beforeInsert, Optional.empty())); source.query(ctx -> ctx.execute("INSERT INTO color_palette(id, color) VALUES(4, 'yellow')")); expectedColorPaletteRecords.add(new DestinationCdcRecordMatcher( - Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, 4).put(COLUMN_COLOR, "yellow").build()), + Jsons.jsonNode(ImmutableMap.builder().put(COLUMN_ID, "4").put(COLUMN_COLOR, "yellow").build()), beforeInsert, Optional.empty())); diff --git a/docs/understanding-airbyte/supported-data-types.md b/docs/understanding-airbyte/supported-data-types.md index 976173491e501..591a3f23dac99 100644 --- a/docs/understanding-airbyte/supported-data-types.md +++ b/docs/understanding-airbyte/supported-data-types.md @@ -4,7 +4,7 @@ AirbyteRecords are required to conform to the Airbyte type system. This means th Because Airbyte's interfaces are JSON-based, this type system is realized using [JSON schemas](https://json-schema.org/). In order to work around some limitations of JSON schemas, we define our own types - see [well_known_types.yaml](https://github.com/airbytehq/airbyte/blob/111131a193359027d0081de1290eb4bb846662ef/airbyte-protocol/protocol-models/src/main/resources/airbyte_protocol/well_known_types.yaml). Sources should use `$ref` to reference these types, rather than directly defining JsonSchema entries. -In an older version of the protocol, we relied on an `airbyte_type` property in schemas. This has been replaced by the well-known type schemas. All "old-style" types map onto well-known types. For example, a legacy connector producing a field of type `{"type": "string", "airbyte_type": "timestamp_with_timezone"}` is treated as producing `{"$ref": "WellKnownTypes.json#definitions/TimestampWithTimezone"}`. +In an older version of the protocol, we relied on an `airbyte_type` property in schemas. This has been replaced by the well-known type schemas. All "old-style" types map onto well-known types. For example, a legacy connector producing a field of type `{"type": "string", "airbyte_type": "timestamp_with_timezone"}` is treated as producing `{"$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone"}`. This type system does not (generally) constrain values. The exception is in numeric types; `integer` and `number` fields must be representable within 64-bit primitives. @@ -14,16 +14,16 @@ This table summarizes the available types. See the [Specific Types](#specific-ty | Airbyte type | JSON Schema | Examples | | -------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | -| String | `{"$ref": "WellKnownTypes.json#definitions/String"}` | `"foo bar"` | -| Binary data, represented as a base64 string | `{"$ref": "WellKnownTypes.json#definitions/BinaryData"}` | `"Zm9vIGJhcgo="` | -| Boolean | `{"$ref": "WellKnownTypes.json#definitions/Boolean"}` | `true` or `false` | -| Date | `{"$ref": "WellKnownTypes.json#definitions/Date"}` | `"2021-01-23"`, `"2021-01-23 BC"` | -| Timestamp with timezone | `{"$ref": "WellKnownTypes.json#definitions/TimestampWithTimezone"}` | `"2022-11-22T01:23:45.123456+05:00"`, `"2022-11-22T01:23:45Z BC"` | -| Timestamp without timezone | `{"$ref": "WellKnownTypes.json#definitions/TimestampWithoutTimezone"}` | `"2022-11-22T01:23:45"`, `"2022-11-22T01:23:45.123456 BC"` | -| Time with timezone | `{"$ref": "WellKnownTypes.json#definitions/TimeWithTimezone"}` | `"01:23:45.123456+05:00"`, `"01:23:45Z"` | -| Time without timezone | `{"$ref": "WellKnownTypes.json#definitions/TimeWithoutTimezone"}` | `"01:23:45.123456"`, `"01:23:45"` | -| Integer | `{"$ref": "WellKnownTypes.json#definitions/Integer"}` | `42`, `NaN`, `Infinity`, `-Infinity` | -| Number | `{"$ref": "WellKnownTypes.json#definitions/Number"}` | `1234.56`, `NaN`, `Infinity`, `-Infinity` | +| String | `{"$ref": "WellKnownTypes.json#/definitions/String"}` | `"foo bar"` | +| Binary data, represented as a base64 string | `{"$ref": "WellKnownTypes.json#/definitions/BinaryData"}` | `"Zm9vIGJhcgo="` | +| Boolean | `{"$ref": "WellKnownTypes.json#/definitions/Boolean"}` | `true` or `false` | +| Date | `{"$ref": "WellKnownTypes.json#/definitions/Date"}` | `"2021-01-23"`, `"2021-01-23 BC"` | +| Timestamp with timezone | `{"$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone"}` | `"2022-11-22T01:23:45.123456+05:00"`, `"2022-11-22T01:23:45Z BC"` | +| Timestamp without timezone | `{"$ref": "WellKnownTypes.json#/definitions/TimestampWithoutTimezone"}` | `"2022-11-22T01:23:45"`, `"2022-11-22T01:23:45.123456 BC"` | +| Time with timezone | `{"$ref": "WellKnownTypes.json#/definitions/TimeWithTimezone"}` | `"01:23:45.123456+05:00"`, `"01:23:45Z"` | +| Time without timezone | `{"$ref": "WellKnownTypes.json#/definitions/TimeWithoutTimezone"}` | `"01:23:45.123456"`, `"01:23:45"` | +| Integer | `{"$ref": "WellKnownTypes.json#/definitions/Integer"}` | `42`, `NaN`, `Infinity`, `-Infinity` | +| Number | `{"$ref": "WellKnownTypes.json#/definitions/Number"}` | `1234.56`, `NaN`, `Infinity`, `-Infinity` | | Array | `{"type": "array"}`; optionally `items` and `additionalItems` | `[1, 2, 3]` | | Object | `{"type": "object"}`; optionally `properties` and `additionalProperties` | `{"foo": "bar"}` | | Union | `{"anyOf": [...]}` or `{"oneOf": [...]}` | | @@ -41,15 +41,15 @@ For example, a source could produce this `AirbyteStream` (remember that the `jso "type": "object", "properties": { "username": { - "$ref": "WellKnownTypes.json#definitions/String" + "$ref": "WellKnownTypes.json#/definitions/String" }, "age": { - "$ref": "WellKnownTypes.json#definitions/Integer" + "$ref": "WellKnownTypes.json#/definitions/Integer" }, "appointments": { "type": "array", "items": { - "$ref": "WellKnownTypes.json#definitions/TimestampWithTimezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" } } } @@ -83,7 +83,7 @@ As an escape hatch, destinations which cannot handle a certain type should just "appointments": { "type": "array", "items": { - "$ref": "WellKnownTypes.json#definitions/TimestampWithTimezone" + "$ref": "WellKnownTypes.json#/definitions/TimestampWithTimezone" } } } @@ -145,7 +145,7 @@ Integers are extended to accept infinity/-infinity/NaN values. Most sources will #### Arrays Arrays contain 0 or more items, which must have a defined type. These types should also conform to the type system. Arrays may require that all of their elements be the same type (`"items": {whatever type...}`), or they may require specific types for the first N entries (`"items": [{first type...}, {second type...}, ... , {Nth type...}]`, AKA tuple-type). -Tuple-typed arrays can configure the type of any additional elements using the `additionalItems` field; by default, any type is allowed. They may also pass a boolean to enable/disable additional elements, with `"additionalItems": true` being equivalent to `"additionalItems": {"$ref": "WellKnownTypes.json#definitions/String"}` and `"additionalItems": false` meaning that only the tuple-defined items are allowed. +Tuple-typed arrays can configure the type of any additional elements using the `additionalItems` field; by default, any type is allowed. They may also pass a boolean to enable/disable additional elements, with `"additionalItems": true` being equivalent to `"additionalItems": {"$ref": "WellKnownTypes.json#/definitions/String"}` and `"additionalItems": false` meaning that only the tuple-defined items are allowed. Destinations may have a difficult time supporting tuple-typed arrays without very specific handling, and as such are permitted to somewhat loosen their requirements. For example, many Avro-based destinations simply declare an array of a union of all allowed types, rather than requiring the correct type in each position of the array. @@ -158,4 +158,4 @@ In some cases, sources may want to use multiple types for the same field. For ex Note that JsonSchema's `allOf` combining structure is not accepted within the protocol, because all of the protocol type definitions are mutually exclusive. #### Untyped values -In some unusual cases, a property may not have type information associated with it. Sources must cast these properties to string, and discover them as `{"$ref": "WellKnownTypes.json#definitions/String"}`. +In some unusual cases, a property may not have type information associated with it. Sources must cast these properties to string, and discover them as `{"$ref": "WellKnownTypes.json#/definitions/String"}`. diff --git a/tools/bin/acceptance_test_kube.sh b/tools/bin/acceptance_test_kube.sh index d3e56fad57807..5820841a0027a 100755 --- a/tools/bin/acceptance_test_kube.sh +++ b/tools/bin/acceptance_test_kube.sh @@ -24,7 +24,7 @@ if [ -n "$CI" ]; then echo "Deploying fluentbit" helm repo add fluent https://fluent.github.io/helm-charts helm repo update fluent -sed -i "s/PLACEHOLDER/${WORKFLOW_RUN_ID}/" tools/bin/fluent_values.yaml +sed -i "s/PLACEHOLDER/${WORKFLOW_RUN_ID}/" tools/bin/fluent_values.yaml helm install --values tools/bin/fluent_values.yaml --set env[1].name="AWS_ACCESS_KEY_ID" --set env[1].value=$(echo "$AWS_S3_INTEGRATION_TEST_CREDS" | jq -r .aws_access_key_id) \ --set env[2].name="AWS_SECRET_ACCESS_KEY" --set env[2].value=$(echo "$AWS_S3_INTEGRATION_TEST_CREDS" | jq -r .aws_secret_access_key) \ --set env[3].name="AWS_S3_BUCKET" --set env[3].value=${AWS_S3_BUCKET} \ @@ -64,7 +64,7 @@ if [ -n "$CI" ]; then describe_pods; } # Uncomment for debugging. Warning, this is verbose. -# trap "mkdir -p /tmp/kubernetes_logs && write_all_logs" EXIT + # trap "mkdir -p /tmp/kubernetes_logs && write_all_logs" EXIT fi kubectl port-forward svc/airbyte-server-svc 8001:8001 & diff --git a/tools/bin/acceptance_test_kube_helm.sh b/tools/bin/acceptance_test_kube_helm.sh index d789b95124a1c..9b918821e762c 100755 --- a/tools/bin/acceptance_test_kube_helm.sh +++ b/tools/bin/acceptance_test_kube_helm.sh @@ -20,7 +20,7 @@ if [ -n "$CI" ]; then echo "Deploying fluentbit" helm repo add fluent https://fluent.github.io/helm-charts helm repo update fluent -sed -i "s/PLACEHOLDER/${WORKFLOW_RUN_ID}/" tools/bin/fluent_values.yaml +sed -i "s/PLACEHOLDER/${WORKFLOW_RUN_ID}/" tools/bin/fluent_values.yaml helm install --values tools/bin/fluent_values.yaml --set env[1].name="AWS_ACCESS_KEY_ID" --set env[1].value=$(echo "$AWS_S3_INTEGRATION_TEST_CREDS" | jq -r .aws_access_key_id) \ --set env[2].name="AWS_SECRET_ACCESS_KEY" --set env[2].value=$(echo "$AWS_S3_INTEGRATION_TEST_CREDS" | jq -r .aws_secret_access_key) \ --set env[3].name="AWS_S3_BUCKET" --set env[3].value=${AWS_S3_BUCKET} \ @@ -74,7 +74,7 @@ if [ -n "$CI" ]; then describe_pods; } # Uncomment for debugging. Warning, this is verbose. -# trap "mkdir -p /tmp/kubernetes_logs && write_all_logs" EXIT + # trap "mkdir -p /tmp/kubernetes_logs && write_all_logs" EXIT fi kubectl expose $(kubectl get po -l app.kubernetes.io/name=server -o name) --name exposed-server-svc --type NodePort --overrides '{ "apiVersion": "v1","spec":{"ports": [{"port":8001,"protocol":"TCP","targetPort":8001,"nodePort":8001}]}}' From 8889370cefa67e4e2912677b0719258c8ab5b4ed Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 27 Jan 2023 10:18:47 -0800 Subject: [PATCH 08/11] On-the-fly migrations of persisted catalogs (#21757) * On the fly catalog migration for normalization activity * On the fly catalog migration for job persistence * On the fly migration for standard sync persistence * On the fly migration for airbyte catalogs * Refactor code to share JsonSchema traversal * Add V0 Data type search function * PMD and Format * Fix getOrInsertActorCatalog and ConfigRepositoryE2E tests * Null-proofing CatalogMigrationV1Helper * More null checks * Fix test * Format * Add data type v1 support to the FE * Changes AC test check to check exited ps (#21672) some docker compose changes no longer show exited processes. this broke out test this change should fix master tested in a runner that failed * Move wellknown types mapping to the utility function * use protocolv1 normalization --------- Co-authored-by: Topher Lubaway Co-authored-by: Edward Gao --- .../migrations/util/SchemaMigrations.java | 76 ++++++----- .../v1/CatalogMigrationV1Helper.java | 121 ++++++++++++++++++ .../migrations/v1/SchemaMigrationV1.java | 2 +- .../DestinationDefinitionsHandlerTest.java | 9 +- .../SourceDefinitionsHandlerTest.java | 5 - .../config-persistence/build.gradle | 1 + .../config/persistence/ConfigRepository.java | 6 +- .../config/persistence/DbConverter.java | 23 +++- .../ConfigRepositoryE2EReadWriteTest.java | 69 +++++++--- .../airbyte/config/persistence/MockData.java | 13 ++ .../seed/destination_definitions.yaml | 18 +-- .../job-persistence/build.gradle | 1 + .../job/DefaultJobPersistence.java | 29 ++++- .../src/core/domain/catalog/models.ts | 1 + .../domain/catalog/traverseSchemaToField.ts | 2 + airbyte-webapp/src/locales/en.json | 3 + .../src/utils/useTranslateDataType.ts | 20 ++- .../sync/NormalizationActivityImpl.java | 8 ++ 18 files changed, 326 insertions(+), 81 deletions(-) create mode 100644 airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/CatalogMigrationV1Helper.java diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java index c90aae6e7e6e3..2c5df0f54fd3f 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/util/SchemaMigrations.java @@ -50,38 +50,7 @@ public static void mutateSchemas(final Function matcher, fina // additionalProperties // else if oneof, allof, etc // but that sounds really verbose for no real benefit - final List subschemas = new ArrayList<>(); - - // array schemas - findSubschemas(subschemas, schema, "items"); - findSubschemas(subschemas, schema, "additionalItems"); - findSubschemas(subschemas, schema, "contains"); - - // object schemas - if (schema.hasNonNull("properties")) { - final ObjectNode propertiesNode = (ObjectNode) schema.get("properties"); - final Iterator> propertiesIterator = propertiesNode.fields(); - while (propertiesIterator.hasNext()) { - final Entry property = propertiesIterator.next(); - subschemas.add(property.getValue()); - } - } - if (schema.hasNonNull("patternProperties")) { - final ObjectNode propertiesNode = (ObjectNode) schema.get("patternProperties"); - final Iterator> propertiesIterator = propertiesNode.fields(); - while (propertiesIterator.hasNext()) { - final Entry property = propertiesIterator.next(); - subschemas.add(property.getValue()); - } - } - findSubschemas(subschemas, schema, "additionalProperties"); - - // combining restrictions - destinations have limited support for these, but we should handle the - // schemas correctly anyway - findSubschemas(subschemas, schema, "allOf"); - findSubschemas(subschemas, schema, "oneOf"); - findSubschemas(subschemas, schema, "anyOf"); - findSubschemas(subschemas, schema, "not"); + final List subschemas = findSubschemas(schema); // recurse into each subschema for (final JsonNode subschema : subschemas) { @@ -90,6 +59,49 @@ public static void mutateSchemas(final Function matcher, fina } } + /** + * Returns a list of all the direct children nodes to consider for subSchemas + * + * @param schema The JsonSchema node to start + * @return a list of the JsonNodes to be considered + */ + public static List findSubschemas(final JsonNode schema) { + final List subschemas = new ArrayList<>(); + + // array schemas + findSubschemas(subschemas, schema, "items"); + findSubschemas(subschemas, schema, "additionalItems"); + findSubschemas(subschemas, schema, "contains"); + + // object schemas + if (schema.hasNonNull("properties")) { + final ObjectNode propertiesNode = (ObjectNode) schema.get("properties"); + final Iterator> propertiesIterator = propertiesNode.fields(); + while (propertiesIterator.hasNext()) { + final Entry property = propertiesIterator.next(); + subschemas.add(property.getValue()); + } + } + if (schema.hasNonNull("patternProperties")) { + final ObjectNode propertiesNode = (ObjectNode) schema.get("patternProperties"); + final Iterator> propertiesIterator = propertiesNode.fields(); + while (propertiesIterator.hasNext()) { + final Entry property = propertiesIterator.next(); + subschemas.add(property.getValue()); + } + } + findSubschemas(subschemas, schema, "additionalProperties"); + + // combining restrictions - destinations have limited support for these, but we should handle the + // schemas correctly anyway + findSubschemas(subschemas, schema, "allOf"); + findSubschemas(subschemas, schema, "oneOf"); + findSubschemas(subschemas, schema, "anyOf"); + findSubschemas(subschemas, schema, "not"); + + return subschemas; + } + /** * If schema contains key, then grab the subschema(s) at schema[key] and add them to the subschemas * list. diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/CatalogMigrationV1Helper.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/CatalogMigrationV1Helper.java new file mode 100644 index 0000000000000..f5a1f78368728 --- /dev/null +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/CatalogMigrationV1Helper.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.commons.protocol.migrations.v1; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.protocol.migrations.util.SchemaMigrations; +import io.airbyte.protocol.models.AirbyteCatalog; +import io.airbyte.protocol.models.AirbyteStream; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.ConfiguredAirbyteStream; + +/** + * For the v0 to v1 migration, it appears that we are persisting some protocol objects without + * version. Until this gets addressed more properly, this class contains the helper functions used + * to handle this on the fly migration. + * + * Once persisted objects are versioned, this code should be deleted. + */ +public class CatalogMigrationV1Helper { + + /** + * Performs an in-place migration of the schema from v0 to v1 if v0 data types are detected + * + * @param configuredAirbyteCatalog to migrate + */ + public static void upgradeSchemaIfNeeded(final ConfiguredAirbyteCatalog configuredAirbyteCatalog) { + if (containsV0DataTypes(configuredAirbyteCatalog)) { + upgradeSchema(configuredAirbyteCatalog); + } + } + + /** + * Performs an in-place migration of the schema from v0 to v1 if v0 data types are detected + * + * @param airbyteCatalog to migrate + */ + public static void upgradeSchemaIfNeeded(final AirbyteCatalog airbyteCatalog) { + if (containsV0DataTypes(airbyteCatalog)) { + upgradeSchema(airbyteCatalog); + } + } + + /** + * Performs an in-place migration of the schema from v0 to v1 + * + * @param configuredAirbyteCatalog to migrate + */ + private static void upgradeSchema(final ConfiguredAirbyteCatalog configuredAirbyteCatalog) { + for (final var stream : configuredAirbyteCatalog.getStreams()) { + SchemaMigrationV1.upgradeSchema(stream.getStream().getJsonSchema()); + } + } + + /** + * Performs an in-place migration of the schema from v0 to v1 + * + * @param airbyteCatalog to migrate + */ + private static void upgradeSchema(final AirbyteCatalog airbyteCatalog) { + for (final var stream : airbyteCatalog.getStreams()) { + SchemaMigrationV1.upgradeSchema(stream.getJsonSchema()); + } + } + + /** + * Returns true if catalog contains v0 data types + */ + private static boolean containsV0DataTypes(final ConfiguredAirbyteCatalog configuredAirbyteCatalog) { + if (configuredAirbyteCatalog == null) { + return false; + } + + return configuredAirbyteCatalog + .getStreams() + .stream().findFirst() + .map(ConfiguredAirbyteStream::getStream) + .map(CatalogMigrationV1Helper::streamContainsV0DataTypes) + .orElse(false); + } + + /** + * Returns true if catalog contains v0 data types + */ + private static boolean containsV0DataTypes(final AirbyteCatalog airbyteCatalog) { + if (airbyteCatalog == null) { + return false; + } + + return airbyteCatalog + .getStreams() + .stream().findFirst() + .map(CatalogMigrationV1Helper::streamContainsV0DataTypes) + .orElse(false); + } + + private static boolean streamContainsV0DataTypes(final AirbyteStream airbyteStream) { + if (airbyteStream == null || airbyteStream.getJsonSchema() == null) { + return false; + } + return hasV0DataType(airbyteStream.getJsonSchema()); + } + + /** + * Performs of search of a v0 data type node, returns true at the first node found. + */ + private static boolean hasV0DataType(final JsonNode schema) { + if (SchemaMigrationV1.isPrimitiveTypeDeclaration(schema)) { + return true; + } + + for (final JsonNode subSchema : SchemaMigrations.findSubschemas(schema)) { + if (hasV0DataType(subSchema)) { + return true; + } + } + return false; + } + +} diff --git a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java index 6cbc37e2639b6..5a4e5fcbab005 100644 --- a/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java +++ b/airbyte-commons-protocol/src/main/java/io/airbyte/commons/protocol/migrations/v1/SchemaMigrationV1.java @@ -48,7 +48,7 @@ public static void downgradeSchema(final JsonNode schema) { * Detects any schema that looks like a primitive type declaration, e.g.: { "type": "string" } or { * "type": ["string", "object"] } */ - private static boolean isPrimitiveTypeDeclaration(final JsonNode schema) { + static boolean isPrimitiveTypeDeclaration(final JsonNode schema) { if (!schema.isObject() || !schema.hasNonNull(TYPE_KEY)) { return false; } diff --git a/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/DestinationDefinitionsHandlerTest.java b/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/DestinationDefinitionsHandlerTest.java index 4a65cc85aa936..8848e7cf4f228 100644 --- a/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/DestinationDefinitionsHandlerTest.java +++ b/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/DestinationDefinitionsHandlerTest.java @@ -41,8 +41,6 @@ import io.airbyte.commons.version.Version; import io.airbyte.config.ActorDefinitionResourceRequirements; import io.airbyte.config.ActorType; -import io.airbyte.config.Configs; -import io.airbyte.config.EnvConfigs; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.NormalizationDestinationDefinitionConfig; import io.airbyte.config.ResourceRequirements; @@ -79,6 +77,7 @@ class DestinationDefinitionsHandlerTest { private AirbyteGithubStore githubStore; private DestinationHandler destinationHandler; private UUID workspaceId; + private AirbyteProtocolVersionRange protocolVersionRange; @SuppressWarnings("unchecked") @BeforeEach @@ -91,6 +90,7 @@ void setUp() { githubStore = mock(AirbyteGithubStore.class); destinationHandler = mock(DestinationHandler.class); workspaceId = UUID.randomUUID(); + protocolVersionRange = new AirbyteProtocolVersionRange(new Version("0.0.0"), new Version("0.3.0")); destinationDefinitionsHandler = new DestinationDefinitionsHandler( configRepository, @@ -98,7 +98,7 @@ void setUp() { schedulerSynchronousClient, githubStore, destinationHandler, - new AirbyteProtocolVersionRange(new Version("0.0.0"), new Version("0.3.0"))); + protocolVersionRange); } private StandardDestinationDefinition generateDestinationDefinition() { @@ -540,9 +540,6 @@ void testUpdateDestination() throws ConfigNotFoundException, IOException, JsonVa verify(schedulerSynchronousClient).createGetSpecJob(newImageName, false); verify(configRepository).writeStandardDestinationDefinition(updatedDestination); - final Configs configs = new EnvConfigs(); - final AirbyteProtocolVersionRange protocolVersionRange = - new AirbyteProtocolVersionRange(configs.getAirbyteProtocolVersionMin(), configs.getAirbyteProtocolVersionMax()); verify(configRepository).clearUnsupportedProtocolVersionFlag(updatedDestination.getDestinationDefinitionId(), ActorType.DESTINATION, protocolVersionRange); } diff --git a/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/SourceDefinitionsHandlerTest.java b/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/SourceDefinitionsHandlerTest.java index 76c2c70dd036a..0fc880fbfc337 100644 --- a/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/SourceDefinitionsHandlerTest.java +++ b/airbyte-commons-server/src/test/java/io/airbyte/commons/server/handlers/SourceDefinitionsHandlerTest.java @@ -43,8 +43,6 @@ import io.airbyte.commons.version.Version; import io.airbyte.config.ActorDefinitionResourceRequirements; import io.airbyte.config.ActorType; -import io.airbyte.config.Configs; -import io.airbyte.config.EnvConfigs; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.ResourceRequirements; import io.airbyte.config.StandardSourceDefinition; @@ -489,9 +487,6 @@ void testUpdateSourceDefinition() throws ConfigNotFoundException, IOException, J verify(schedulerSynchronousClient).createGetSpecJob(newImageName, false); verify(configRepository).writeStandardSourceDefinition(updatedSource); - final Configs configs = new EnvConfigs(); - final AirbyteProtocolVersionRange protocolVersionRange = - new AirbyteProtocolVersionRange(configs.getAirbyteProtocolVersionMin(), configs.getAirbyteProtocolVersionMax()); verify(configRepository).clearUnsupportedProtocolVersionFlag(updatedSource.getSourceDefinitionId(), ActorType.SOURCE, protocolVersionRange); } diff --git a/airbyte-config/config-persistence/build.gradle b/airbyte-config/config-persistence/build.gradle index 70f6fb95ba31a..a4013408ed424 100644 --- a/airbyte-config/config-persistence/build.gradle +++ b/airbyte-config/config-persistence/build.gradle @@ -10,6 +10,7 @@ configurations.all { dependencies { implementation project(':airbyte-commons') implementation project(':airbyte-commons-docker') + implementation project(':airbyte-commons-protocol') implementation project(':airbyte-config:config-models') implementation project(':airbyte-db:db-lib') implementation project(':airbyte-db:jooq') diff --git a/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java b/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java index fa63c660af8a8..4492e4171e0e5 100644 --- a/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java +++ b/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java @@ -1206,8 +1206,10 @@ private Map findCatalogByHash(final String catalogHash, fi final Map result = new HashMap<>(); for (final Record record : records) { - final AirbyteCatalog catalog = Jsons.deserialize( - record.get(ACTOR_CATALOG.CATALOG).toString(), AirbyteCatalog.class); + // We do not apply the on-the-fly migration here because the only caller is getOrInsertActorCatalog + // which is using this to figure out if the catalog has already been inserted. Migrating on the fly + // here will cause us to add a duplicate each time we check for existence of a catalog. + final AirbyteCatalog catalog = Jsons.deserialize(record.get(ACTOR_CATALOG.CATALOG).toString(), AirbyteCatalog.class); result.put(record.get(ACTOR_CATALOG.ID), catalog); } return result; diff --git a/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java b/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java index 58f4b32496d24..12b787e110d9b 100644 --- a/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java +++ b/airbyte-config/config-persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java @@ -15,6 +15,7 @@ import io.airbyte.commons.enums.Enums; import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.protocol.migrations.v1.CatalogMigrationV1Helper; import io.airbyte.config.ActorCatalog; import io.airbyte.config.ActorCatalogFetchEvent; import io.airbyte.config.ActorCatalogWithUpdatedAt; @@ -41,6 +42,7 @@ import io.airbyte.config.StandardSync.Status; import io.airbyte.config.StandardWorkspace; import io.airbyte.config.WorkspaceServiceAccount; +import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConnectorSpecification; import java.time.LocalDateTime; @@ -68,8 +70,7 @@ public static StandardSync buildStandardSync(final Record record, final Listv1) + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(configuredAirbyteCatalog); + return configuredAirbyteCatalog; + } + public static StandardWorkspace buildStandardWorkspace(final Record record) { final List notificationList = new ArrayList<>(); final List fetchedNotifications = Jsons.deserialize(record.get(WORKSPACE.NOTIFICATIONS).data(), List.class); @@ -222,18 +230,25 @@ public static SourceOAuthParameter buildSourceOAuthParameter(final Record record public static ActorCatalog buildActorCatalog(final Record record) { return new ActorCatalog() .withId(record.get(ACTOR_CATALOG.ID)) - .withCatalog(Jsons.deserialize(record.get(ACTOR_CATALOG.CATALOG).toString())) + .withCatalog(Jsons.jsonNode(parseAirbyteCatalog(record.get(ACTOR_CATALOG.CATALOG).toString()))) .withCatalogHash(record.get(ACTOR_CATALOG.CATALOG_HASH)); } public static ActorCatalogWithUpdatedAt buildActorCatalogWithUpdatedAt(final Record record) { return new ActorCatalogWithUpdatedAt() .withId(record.get(ACTOR_CATALOG.ID)) - .withCatalog(Jsons.deserialize(record.get(ACTOR_CATALOG.CATALOG).toString())) + .withCatalog(Jsons.jsonNode(parseAirbyteCatalog(record.get(ACTOR_CATALOG.CATALOG).toString()))) .withCatalogHash(record.get(ACTOR_CATALOG.CATALOG_HASH)) .withUpdatedAt(record.get(ACTOR_CATALOG_FETCH_EVENT.CREATED_AT, LocalDateTime.class).toEpochSecond(ZoneOffset.UTC)); } + public static AirbyteCatalog parseAirbyteCatalog(final String airbyteCatalogString) { + final AirbyteCatalog airbyteCatalog = Jsons.deserialize(airbyteCatalogString, AirbyteCatalog.class); + // On-the-fly migration of persisted data types related objects (protocol v0->v1) + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(airbyteCatalog); + return airbyteCatalog; + } + public static ActorCatalogFetchEvent buildActorCatalogFetchEvent(final Record record) { return new ActorCatalogFetchEvent() .withActorId(record.get(ACTOR_CATALOG_FETCH_EVENT.ACTOR_ID)) diff --git a/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java b/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java index ba3d6be057e8b..1de35c619a839 100644 --- a/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java +++ b/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java @@ -141,6 +141,7 @@ void testFetchActorsUsingDefinition() throws IOException { @Test void testSimpleInsertActorCatalog() throws IOException, JsonValidationException, SQLException { + final String otherConfigHash = "OtherConfigHash"; final StandardWorkspace workspace = MockData.standardWorkspaces().get(0); final StandardSourceDefinition sourceDefinition = new StandardSourceDefinition() @@ -160,35 +161,52 @@ void testSimpleInsertActorCatalog() throws IOException, JsonValidationException, configRepository.writeSourceConnectionNoSecrets(source); final AirbyteCatalog actorCatalog = CatalogHelpers.createAirbyteCatalog("clothes", Field.of("name", JsonSchemaType.STRING)); + final AirbyteCatalog expectedActorCatalog = CatalogHelpers.createAirbyteCatalog("clothes", Field.of("name", JsonSchemaType.STRING_V1)); configRepository.writeActorCatalogFetchEvent( actorCatalog, source.getSourceId(), DOCKER_IMAGE_TAG, CONFIG_HASH); final Optional catalog = configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, CONFIG_HASH); assertTrue(catalog.isPresent()); - assertEquals(actorCatalog, Jsons.object(catalog.get().getCatalog(), AirbyteCatalog.class)); + assertEquals(expectedActorCatalog, Jsons.object(catalog.get().getCatalog(), AirbyteCatalog.class)); assertFalse(configRepository.getActorCatalog(source.getSourceId(), "1.3.0", CONFIG_HASH).isPresent()); - assertFalse(configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, "OtherConfigHash").isPresent()); + assertFalse(configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, otherConfigHash).isPresent()); configRepository.writeActorCatalogFetchEvent(actorCatalog, source.getSourceId(), "1.3.0", CONFIG_HASH); final Optional catalogNewConnectorVersion = configRepository.getActorCatalog(source.getSourceId(), "1.3.0", CONFIG_HASH); assertTrue(catalogNewConnectorVersion.isPresent()); - assertEquals(actorCatalog, Jsons.object(catalogNewConnectorVersion.get().getCatalog(), AirbyteCatalog.class)); + assertEquals(expectedActorCatalog, Jsons.object(catalogNewConnectorVersion.get().getCatalog(), AirbyteCatalog.class)); - configRepository.writeActorCatalogFetchEvent(actorCatalog, source.getSourceId(), "1.2.0", "OtherConfigHash"); + configRepository.writeActorCatalogFetchEvent(actorCatalog, source.getSourceId(), "1.2.0", otherConfigHash); final Optional catalogNewConfig = - configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, "OtherConfigHash"); + configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, otherConfigHash); assertTrue(catalogNewConfig.isPresent()); - assertEquals(actorCatalog, Jsons.object(catalogNewConfig.get().getCatalog(), AirbyteCatalog.class)); + assertEquals(expectedActorCatalog, Jsons.object(catalogNewConfig.get().getCatalog(), AirbyteCatalog.class)); final int catalogDbEntry = database.query(ctx -> ctx.selectCount().from(ACTOR_CATALOG)).fetchOne().into(int.class); assertEquals(1, catalogDbEntry); + + // Writing the previous catalog with v1 data types + configRepository.writeActorCatalogFetchEvent(expectedActorCatalog, source.getSourceId(), "1.2.0", otherConfigHash); + final Optional catalogV1NewConfig = + configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, otherConfigHash); + assertTrue(catalogV1NewConfig.isPresent()); + assertEquals(expectedActorCatalog, Jsons.object(catalogNewConfig.get().getCatalog(), AirbyteCatalog.class)); + + configRepository.writeActorCatalogFetchEvent(expectedActorCatalog, source.getSourceId(), "1.4.0", otherConfigHash); + final Optional catalogV1again = + configRepository.getActorCatalog(source.getSourceId(), DOCKER_IMAGE_TAG, otherConfigHash); + assertTrue(catalogV1again.isPresent()); + assertEquals(expectedActorCatalog, Jsons.object(catalogNewConfig.get().getCatalog(), AirbyteCatalog.class)); + + final int catalogDbEntry2 = database.query(ctx -> ctx.selectCount().from(ACTOR_CATALOG)).fetchOne().into(int.class); + assertEquals(2, catalogDbEntry2); } @Test void testListWorkspaceStandardSyncAll() throws IOException { - final List expectedSyncs = MockData.standardSyncs().subList(0, 4); + final List expectedSyncs = copyWithV1Types(MockData.standardSyncs().subList(0, 4)); final List actualSyncs = configRepository.listWorkspaceStandardSyncs( MockData.standardWorkspaces().get(0).getWorkspaceId(), true); @@ -199,10 +217,11 @@ void testListWorkspaceStandardSyncAll() throws IOException { void testListWorkspaceStandardSyncWithAllFiltering() throws IOException { final UUID workspaceId = MockData.standardWorkspaces().get(0).getWorkspaceId(); final StandardSyncQuery query = new StandardSyncQuery(workspaceId, List.of(MockData.SOURCE_ID_1), List.of(MockData.DESTINATION_ID_1), false); - final List expectedSyncs = MockData.standardSyncs().subList(0, 3).stream() - .filter(sync -> query.destinationId().contains(sync.getDestinationId())) - .filter(sync -> query.sourceId().contains(sync.getSourceId())) - .toList(); + final List expectedSyncs = copyWithV1Types( + MockData.standardSyncs().subList(0, 3).stream() + .filter(sync -> query.destinationId().contains(sync.getDestinationId())) + .filter(sync -> query.sourceId().contains(sync.getSourceId())) + .toList()); final List actualSyncs = configRepository.listWorkspaceStandardSyncs(query); assertSyncsMatch(expectedSyncs, actualSyncs); @@ -212,9 +231,10 @@ void testListWorkspaceStandardSyncWithAllFiltering() throws IOException { void testListWorkspaceStandardSyncDestinationFiltering() throws IOException { final UUID workspaceId = MockData.standardWorkspaces().get(0).getWorkspaceId(); final StandardSyncQuery query = new StandardSyncQuery(workspaceId, null, List.of(MockData.DESTINATION_ID_1), false); - final List expectedSyncs = MockData.standardSyncs().subList(0, 3).stream() - .filter(sync -> query.destinationId().contains(sync.getDestinationId())) - .toList(); + final List expectedSyncs = copyWithV1Types( + MockData.standardSyncs().subList(0, 3).stream() + .filter(sync -> query.destinationId().contains(sync.getDestinationId())) + .toList()); final List actualSyncs = configRepository.listWorkspaceStandardSyncs(query); assertSyncsMatch(expectedSyncs, actualSyncs); @@ -224,9 +244,10 @@ void testListWorkspaceStandardSyncDestinationFiltering() throws IOException { void testListWorkspaceStandardSyncSourceFiltering() throws IOException { final UUID workspaceId = MockData.standardWorkspaces().get(0).getWorkspaceId(); final StandardSyncQuery query = new StandardSyncQuery(workspaceId, List.of(MockData.SOURCE_ID_2), null, false); - final List expectedSyncs = MockData.standardSyncs().subList(0, 3).stream() - .filter(sync -> query.sourceId().contains(sync.getSourceId())) - .toList(); + final List expectedSyncs = copyWithV1Types( + MockData.standardSyncs().subList(0, 3).stream() + .filter(sync -> query.sourceId().contains(sync.getSourceId())) + .toList()); final List actualSyncs = configRepository.listWorkspaceStandardSyncs(query); assertSyncsMatch(expectedSyncs, actualSyncs); @@ -234,7 +255,7 @@ void testListWorkspaceStandardSyncSourceFiltering() throws IOException { @Test void testListWorkspaceStandardSyncExcludeDeleted() throws IOException { - final List expectedSyncs = MockData.standardSyncs().subList(0, 3); + final List expectedSyncs = copyWithV1Types(MockData.standardSyncs().subList(0, 3)); final List actualSyncs = configRepository.listWorkspaceStandardSyncs(MockData.standardWorkspaces().get(0).getWorkspaceId(), false); assertSyncsMatch(expectedSyncs, actualSyncs); @@ -456,12 +477,22 @@ void testMissingSourceOAuthByDefinitionId() throws IOException { @Test void testGetStandardSyncUsingOperation() throws IOException { final UUID operationId = MockData.standardSyncOperations().get(0).getOperationId(); - final List expectedSyncs = MockData.standardSyncs().subList(0, 3); + final List expectedSyncs = copyWithV1Types(MockData.standardSyncs().subList(0, 3)); final List actualSyncs = configRepository.listStandardSyncsUsingOperation(operationId); assertSyncsMatch(expectedSyncs, actualSyncs); } + private List copyWithV1Types(final List syncs) { + return syncs.stream() + .map(standardSync -> { + final StandardSync copiedStandardSync = Jsons.deserialize(Jsons.serialize(standardSync), StandardSync.class); + copiedStandardSync.setCatalog(MockData.getConfiguredCatalogWithV1DataTypes()); + return copiedStandardSync; + }) + .toList(); + } + private void assertSyncsMatch(final List expectedSyncs, final List actualSyncs) { assertEquals(expectedSyncs.size(), actualSyncs.size()); diff --git a/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/MockData.java b/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/MockData.java index 80d2be2d1a4a4..12d8ef84f451d 100644 --- a/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/MockData.java +++ b/airbyte-config/config-persistence/src/test/java/io/airbyte/config/persistence/MockData.java @@ -599,6 +599,19 @@ private static ConfiguredAirbyteCatalog getConfiguredCatalog() { return CatalogHelpers.toDefaultConfiguredCatalog(catalog); } + public static ConfiguredAirbyteCatalog getConfiguredCatalogWithV1DataTypes() { + final AirbyteCatalog catalog = new AirbyteCatalog().withStreams(List.of( + CatalogHelpers.createAirbyteStream( + "models", + "models_schema", + io.airbyte.protocol.models.Field.of("id", JsonSchemaType.NUMBER_V1), + io.airbyte.protocol.models.Field.of("make_id", JsonSchemaType.NUMBER_V1), + io.airbyte.protocol.models.Field.of("model", JsonSchemaType.STRING_V1)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id"))))); + return CatalogHelpers.toDefaultConfiguredCatalog(catalog); + } + public static List standardSyncStates() { final StandardSyncState standardSyncState1 = new StandardSyncState() .withConnectionId(CONNECTION_ID_1) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 98f3542a3a1fe..b6d81cfc9a96b 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -45,7 +45,7 @@ icon: bigquery.svg normalizationConfig: normalizationRepository: airbyte/normalization - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: bigquery supportsDbt: true resourceRequirements: @@ -91,7 +91,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-clickhouse - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: clickhouse supportsDbt: true - name: Cloudflare R2 @@ -213,7 +213,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-mssql - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: mssql supportsDbt: true - name: MeiliSearch @@ -239,7 +239,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-mysql - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: mysql supportsDbt: true - name: Oracle @@ -251,7 +251,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-oracle - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: oracle supportsDbt: true - name: Postgres @@ -263,7 +263,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: postgres supportsDbt: true - name: Pulsar @@ -295,7 +295,7 @@ icon: redshift.svg normalizationConfig: normalizationRepository: airbyte/normalization-redshift - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: redshift supportsDbt: true resourceRequirements: @@ -353,7 +353,7 @@ icon: snowflake.svg normalizationConfig: normalizationRepository: airbyte/normalization-snowflake - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: snowflake supportsDbt: true resourceRequirements: @@ -407,7 +407,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-tidb - normalizationTag: 0.2.25 + normalizationTag: protocolv1 normalizationIntegrationType: tidb supportsDbt: true - name: Typesense diff --git a/airbyte-persistence/job-persistence/build.gradle b/airbyte-persistence/job-persistence/build.gradle index b7f581912818d..e4c1f4e89247d 100644 --- a/airbyte-persistence/job-persistence/build.gradle +++ b/airbyte-persistence/job-persistence/build.gradle @@ -5,6 +5,7 @@ plugins { dependencies { implementation project(':airbyte-commons') implementation project(':airbyte-commons-docker') + implementation project(':airbyte-commons-protocol') implementation project(':airbyte-oauth') implementation project(':airbyte-config:config-models') implementation project(':airbyte-db:jooq') diff --git a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java index 26b40fd3fc791..e3bbdc7362bd3 100644 --- a/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java +++ b/airbyte-persistence/job-persistence/src/main/java/io/airbyte/persistence/job/DefaultJobPersistence.java @@ -22,6 +22,7 @@ import com.google.common.collect.UnmodifiableIterator; import io.airbyte.commons.enums.Enums; import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.protocol.migrations.v1.CatalogMigrationV1Helper; import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.text.Names; import io.airbyte.commons.text.Sqls; @@ -34,6 +35,7 @@ import io.airbyte.config.JobConfig; import io.airbyte.config.JobConfig.ConfigType; import io.airbyte.config.JobOutput; +import io.airbyte.config.JobOutput.OutputType; import io.airbyte.config.NormalizationSummary; import io.airbyte.config.StreamSyncStats; import io.airbyte.config.SyncStats; @@ -917,7 +919,7 @@ private static Job getJobFromRecord(final Record record) { return new Job(record.get(JOB_ID, Long.class), Enums.toEnum(record.get("config_type", String.class), ConfigType.class).orElseThrow(), record.get("scope", String.class), - Jsons.deserialize(record.get("config", String.class), JobConfig.class), + parseJobConfigFromString(record.get("config", String.class)), new ArrayList(), JobStatus.valueOf(record.get("job_status", String.class).toUpperCase()), Optional.ofNullable(record.get("job_started_at")).map(value -> getEpoch(record, "started_at")).orElse(null), @@ -925,12 +927,24 @@ private static Job getJobFromRecord(final Record record) { getEpoch(record, "job_updated_at")); } + private static JobConfig parseJobConfigFromString(final String jobConfigString) { + final JobConfig jobConfig = Jsons.deserialize(jobConfigString, JobConfig.class); + // On-the-fly migration of persisted data types related objects (protocol v0->v1) + if (jobConfig.getConfigType() == ConfigType.SYNC && jobConfig.getSync() != null) { + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobConfig.getSync().getConfiguredAirbyteCatalog()); + } else if (jobConfig.getConfigType() == ConfigType.RESET_CONNECTION && jobConfig.getResetConnection() != null) { + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobConfig.getResetConnection().getConfiguredAirbyteCatalog()); + } + return jobConfig; + } + private static Attempt getAttemptFromRecord(final Record record) { + final String attemptOutputString = record.get("attempt_output", String.class); return new Attempt( record.get(ATTEMPT_NUMBER, int.class), record.get(JOB_ID, Long.class), Path.of(record.get("log_path", String.class)), - record.get("attempt_output", String.class) == null ? null : Jsons.deserialize(record.get("attempt_output", String.class), JobOutput.class), + attemptOutputString == null ? null : parseJobOutputFromString(attemptOutputString), Enums.toEnum(record.get("attempt_status", String.class), AttemptStatus.class).orElseThrow(), record.get("processing_task_queue", String.class), record.get("attempt_failure_summary", String.class) == null ? null @@ -942,6 +956,17 @@ private static Attempt getAttemptFromRecord(final Record record) { .orElse(null)); } + private static JobOutput parseJobOutputFromString(final String jobOutputString) { + final JobOutput jobOutput = Jsons.deserialize(jobOutputString, JobOutput.class); + // On-the-fly migration of persisted data types related objects (protocol v0->v1) + if (jobOutput.getOutputType() == OutputType.DISCOVER_CATALOG && jobOutput.getDiscoverCatalog() != null) { + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobOutput.getDiscoverCatalog().getCatalog()); + } else if (jobOutput.getOutputType() == OutputType.SYNC && jobOutput.getSync() != null) { + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(jobOutput.getSync().getOutputCatalog()); + } + return jobOutput; + } + private static List getAttemptsWithJobsFromResult(final Result result) { return result .stream() diff --git a/airbyte-webapp/src/core/domain/catalog/models.ts b/airbyte-webapp/src/core/domain/catalog/models.ts index f11a475e76f0d..fe4488338cc21 100644 --- a/airbyte-webapp/src/core/domain/catalog/models.ts +++ b/airbyte-webapp/src/core/domain/catalog/models.ts @@ -3,6 +3,7 @@ export interface SyncSchemaField { type: string; key: string; path: string[]; + $ref?: string; airbyte_type?: string; format?: string; fields?: SyncSchemaField[]; diff --git a/airbyte-webapp/src/core/domain/catalog/traverseSchemaToField.ts b/airbyte-webapp/src/core/domain/catalog/traverseSchemaToField.ts index ad4f920659584..724dd3afad103 100644 --- a/airbyte-webapp/src/core/domain/catalog/traverseSchemaToField.ts +++ b/airbyte-webapp/src/core/domain/catalog/traverseSchemaToField.ts @@ -4,6 +4,7 @@ import { SyncSchemaField } from "./models"; type AirbyteJsonSchema = JSONSchema7Definition & { airbyte_type?: string; + $ref?: string; }; export const traverseSchemaToField = ( @@ -40,6 +41,7 @@ const traverseJsonSchemaProperties = ( (Array.isArray(jsonSchema?.type) ? jsonSchema?.type.find((t) => t !== "null") ?? jsonSchema?.type[0] : jsonSchema?.type) ?? "null", + $ref: jsonSchema?.$ref, airbyte_type: jsonSchema?.airbyte_type, format: jsonSchema?.format, }, diff --git a/airbyte-webapp/src/locales/en.json b/airbyte-webapp/src/locales/en.json index d48e12c1b701f..6b27cb4b7bf51 100644 --- a/airbyte-webapp/src/locales/en.json +++ b/airbyte-webapp/src/locales/en.json @@ -626,9 +626,12 @@ "ui.stepIndicator.currentStep": "(current step)", "airbyte.datatype.string": "String", + "airbyte.datatype.binary_data": "Binary Data", "airbyte.datatype.date": "Date", "airbyte.datatype.timestamp_with_timezone": "Timestamp with Timezone", "airbyte.datatype.timestamp_without_timezone": "Datetime", + "airbyte.datatype.time_with_timezone": "Time with Timezone", + "airbyte.datatype.time_without_timezone": "Time", "airbyte.datatype.datetime": "Datetime", "airbyte.datatype.integer": "Integer", "airbyte.datatype.big_integer": "Big Integer", diff --git a/airbyte-webapp/src/utils/useTranslateDataType.ts b/airbyte-webapp/src/utils/useTranslateDataType.ts index e9b2b8e190473..3a15919bcd8ba 100644 --- a/airbyte-webapp/src/utils/useTranslateDataType.ts +++ b/airbyte-webapp/src/utils/useTranslateDataType.ts @@ -3,19 +3,37 @@ import { useIntl } from "react-intl"; export interface AirbyteConnectorData { type: string; + $ref?: string; format?: string; airbyte_type?: string; anyOf?: unknown[]; oneOf?: unknown[]; } +const wellKnownTypesPrefix = "WellKnownTypes.json#/definitions/"; +const wellKnownTypeToDataTypeKey = { + [`${wellKnownTypesPrefix}String`]: "string", + [`${wellKnownTypesPrefix}BinaryData`]: "binary_data", + [`${wellKnownTypesPrefix}Number`]: "number", + [`${wellKnownTypesPrefix}Integer`]: "integer", + [`${wellKnownTypesPrefix}Boolean`]: "boolean", + [`${wellKnownTypesPrefix}Date`]: "date", + [`${wellKnownTypesPrefix}TimestampWithTimezone`]: "timestamp_with_timezone", + [`${wellKnownTypesPrefix}TimestampWithoutTimezone`]: "timestamp_without_timezone", + [`${wellKnownTypesPrefix}TimeWithTimezone`]: "time_with_timezone", + [`${wellKnownTypesPrefix}TimeWithoutTimezone`]: "time_without_timezone", +}; + const getType = (data: AirbyteConnectorData): string => { if (data.oneOf || data.anyOf) { return "union"; } - if (!data.anyOf && !data.oneOf && !data.airbyte_type && !data.format && !data.type) { + if (!data.anyOf && !data.oneOf && !data.airbyte_type && !data.format && !data.type && !data.$ref) { return "unknown"; } + if (data.$ref) { + return wellKnownTypeToDataTypeKey[data.$ref] ?? "unknown"; + } return data.airbyte_type ?? data.format ?? data.type; }; diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java index a498162b8eef7..accb986d89a04 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java @@ -14,6 +14,7 @@ import io.airbyte.api.client.model.generated.JobIdRequestBody; import io.airbyte.commons.functional.CheckedSupplier; import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.protocol.migrations.v1.CatalogMigrationV1Helper; import io.airbyte.commons.temporal.CancellationHandler; import io.airbyte.commons.temporal.TemporalUtils; import io.airbyte.config.AirbyteConfigValidator; @@ -106,6 +107,13 @@ public NormalizationSummary normalize(final JobRunConfig jobRunConfig, final var fullDestinationConfig = secretsHydrator.hydrate(input.getDestinationConfiguration()); final var fullInput = Jsons.clone(input).withDestinationConfiguration(fullDestinationConfig); + // This should only be useful for syncs that started before the release that contained v1 migration. + // However, we lack the effective way to detect those syncs so this code should remain until we + // phase v0 out. + // Performance impact should be low considering the nature of the check compared to the time to run + // normalization. + CatalogMigrationV1Helper.upgradeSchemaIfNeeded(fullInput.getCatalog()); + final Supplier inputSupplier = () -> { airbyteConfigValidator.ensureAsRuntime(ConfigSchema.NORMALIZATION_INPUT, Jsons.jsonNode(fullInput)); return fullInput; From cfa20f1d38bfed8a18ed9b73092cc2e3214d6c7e Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Fri, 27 Jan 2023 11:32:50 -0800 Subject: [PATCH 09/11] Update protocol support range (#21996) --- airbyte-bootloader/src/main/resources/application.yml | 2 +- airbyte-server/src/main/resources/application.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-bootloader/src/main/resources/application.yml b/airbyte-bootloader/src/main/resources/application.yml index 38ed361e5e0cb..9709b4b88e5f5 100644 --- a/airbyte-bootloader/src/main/resources/application.yml +++ b/airbyte-bootloader/src/main/resources/application.yml @@ -22,7 +22,7 @@ airbyte: target: range: min-version: ${AIRBYTE_PROTOCOL_VERSION_MIN:0.0.0} - max-version: ${AIRBYTE_PROTOCOL_VERSION_MAX:0.3.0} + max-version: ${AIRBYTE_PROTOCOL_VERSION_MAX:1.0.0} secret: persistence: ${SECRET_PERSISTENCE:TESTING_CONFIG_DB_TABLE} store: diff --git a/airbyte-server/src/main/resources/application.yml b/airbyte-server/src/main/resources/application.yml index 1c58a63ef5466..f8598499b43d8 100644 --- a/airbyte-server/src/main/resources/application.yml +++ b/airbyte-server/src/main/resources/application.yml @@ -81,7 +81,7 @@ airbyte: root: ${WORKSPACE_ROOT} protocol: min-version: ${AIRBYTE_PROTOCOL_VERSION_MIN:0.0.0} - max-version: ${AIRBYTE_PROTOCOL_VERSION_MAX:0.3.0} + max-version: ${AIRBYTE_PROTOCOL_VERSION_MAX:1.0.0} temporal: cloud: From cc2811673d3eecb1e930952d683a4659d8f8d58a Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Mon, 30 Jan 2023 08:10:13 -0800 Subject: [PATCH 10/11] bump normalization version to 0.3.0 --- .../seed/destination_definitions.yaml | 18 +++++++++--------- .../bases/base-normalization/Dockerfile | 2 +- .../basic-normalization.md | 1 + 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index b6d81cfc9a96b..5a50841c4bc2e 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -45,7 +45,7 @@ icon: bigquery.svg normalizationConfig: normalizationRepository: airbyte/normalization - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: bigquery supportsDbt: true resourceRequirements: @@ -91,7 +91,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-clickhouse - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: clickhouse supportsDbt: true - name: Cloudflare R2 @@ -213,7 +213,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-mssql - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: mssql supportsDbt: true - name: MeiliSearch @@ -239,7 +239,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-mysql - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: mysql supportsDbt: true - name: Oracle @@ -251,7 +251,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-oracle - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: oracle supportsDbt: true - name: Postgres @@ -263,7 +263,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: postgres supportsDbt: true - name: Pulsar @@ -295,7 +295,7 @@ icon: redshift.svg normalizationConfig: normalizationRepository: airbyte/normalization-redshift - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: redshift supportsDbt: true resourceRequirements: @@ -353,7 +353,7 @@ icon: snowflake.svg normalizationConfig: normalizationRepository: airbyte/normalization-snowflake - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: snowflake supportsDbt: true resourceRequirements: @@ -407,7 +407,7 @@ releaseStage: alpha normalizationConfig: normalizationRepository: airbyte/normalization-tidb - normalizationTag: protocolv1 + normalizationTag: 0.3.0 normalizationIntegrationType: tidb supportsDbt: true - name: Typesense diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile index 71cbb2f88b6e5..eb985d00e88db 100644 --- a/airbyte-integrations/bases/base-normalization/Dockerfile +++ b/airbyte-integrations/bases/base-normalization/Dockerfile @@ -28,5 +28,5 @@ WORKDIR /airbyte ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" ENTRYPOINT ["/airbyte/entrypoint.sh"] -LABEL io.airbyte.version=0.2.25 +LABEL io.airbyte.version=0.3.0 LABEL io.airbyte.name=airbyte/normalization diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/understanding-airbyte/basic-normalization.md index 305e382f75c6c..876b14edded22 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/understanding-airbyte/basic-normalization.md @@ -353,6 +353,7 @@ Therefore, in order to "upgrade" to the desired normalization version, you need | Airbyte Version | Normalization Version | Date | Pull Request | Subject | |:----------------|:----------------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| +| | 0.3.0 | 2023-01-30 | [\#19721](https://github.com/airbytehq/airbyte/pull/19721) | Update normalization to airbyte-protocol v1.0.0 | | | 0.2.25 | 2022-12-05 | [\#19573](https://github.com/airbytehq/airbyte/pull/19573) | Update Clickhouse dbt version | | | 0.2.24 | 2022-11-01 | [\#18015](https://github.com/airbytehq/airbyte/pull/18015) | Add a drop table hook that drops *_scd tables after overwrite/reset | | | 0.2.23 | 2022-10-12 | [\#17483](https://github.com/airbytehq/airbyte/pull/17483) (published in [\#17896](https://github.com/airbytehq/airbyte/pull/17896)) | Remove unnecessary `Native Port` config option | From b367f960b2d835dab2e01ecd47a5ccc37553194e Mon Sep 17 00:00:00 2001 From: Jimmy Ma Date: Mon, 30 Jan 2023 08:29:52 -0800 Subject: [PATCH 11/11] Add version check on normalization (#22048) * Add normalization min version check * Add visible for testing --- .../sync/NormalizationActivityImpl.java | 26 +++++++++++++++++ .../sync/NormalizationActivityImplTest.java | 28 +++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 airbyte-workers/src/test/java/io/airbyte/workers/temporal/sync/NormalizationActivityImplTest.java diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java index accb986d89a04..54ab44f99edd9 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/NormalizationActivityImpl.java @@ -9,6 +9,7 @@ import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.DESTINATION_DOCKER_IMAGE_KEY; import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.JOB_ID_KEY; +import com.google.common.annotations.VisibleForTesting; import datadog.trace.api.Trace; import io.airbyte.api.client.AirbyteApiClient; import io.airbyte.api.client.model.generated.JobIdRequestBody; @@ -17,6 +18,7 @@ import io.airbyte.commons.protocol.migrations.v1.CatalogMigrationV1Helper; import io.airbyte.commons.temporal.CancellationHandler; import io.airbyte.commons.temporal.TemporalUtils; +import io.airbyte.commons.version.Version; import io.airbyte.config.AirbyteConfigValidator; import io.airbyte.config.ConfigSchema; import io.airbyte.config.Configs.WorkerEnvironment; @@ -66,6 +68,8 @@ public class NormalizationActivityImpl implements NormalizationActivity { private final ResourceRequirements normalizationResourceRequirements; private final AirbyteApiClient airbyteApiClient; + private final static Version MINIMAL_VERSION_FOR_DATATYPES_V1 = new Version("0.3.0"); + public NormalizationActivityImpl(@Named("containerOrchestratorConfig") final Optional containerOrchestratorConfig, @Named("defaultWorkerConfigs") final WorkerConfigs workerConfigs, @Named("defaultProcessFactory") final ProcessFactory processFactory, @@ -107,6 +111,15 @@ public NormalizationSummary normalize(final JobRunConfig jobRunConfig, final var fullDestinationConfig = secretsHydrator.hydrate(input.getDestinationConfiguration()); final var fullInput = Jsons.clone(input).withDestinationConfiguration(fullDestinationConfig); + // Check the version of normalization + // We require at least version 0.3.0 to support data types v1. Using an older version would lead to + // all columns being typed as JSONB. We should fail before coercing the types into an unexpected + // form. + if (!normalizationSupportsV1DataTypes(destinationLauncherConfig)) { + throw new IllegalStateException("Normalization is too old, a version >=\"0.3.0\" is required but got \"" + + destinationLauncherConfig.getNormalizationDockerImage() + "\" instead"); + } + // This should only be useful for syncs that started before the release that contained v1 migration. // However, we lack the effective way to detect those syncs so this code should remain until we // phase v0 out. @@ -152,6 +165,19 @@ public NormalizationInput generateNormalizationInput(final StandardSyncInput syn .withResourceRequirements(normalizationResourceRequirements); } + @VisibleForTesting + static boolean normalizationSupportsV1DataTypes(final IntegrationLauncherConfig destinationLauncherConfig) { + try { + final String[] normalizationImage = destinationLauncherConfig.getNormalizationDockerImage().split(":", 2); + final Version normalizationVersion = new Version(normalizationImage[1]); + return normalizationVersion.greaterThanOrEqualTo(MINIMAL_VERSION_FOR_DATATYPES_V1); + } catch (final IllegalArgumentException e) { + // IllegalArgument here means that the version isn't in a semver format. + // The current behavior is to assume it supports v1 data types for dev purposes. + return true; + } + } + private CheckedSupplier, Exception> getLegacyWorkerFactory( final IntegrationLauncherConfig destinationLauncherConfig, final JobRunConfig jobRunConfig) { diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/sync/NormalizationActivityImplTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/sync/NormalizationActivityImplTest.java new file mode 100644 index 0000000000000..9c589a456fe9f --- /dev/null +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/sync/NormalizationActivityImplTest.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.sync; + +import io.airbyte.persistence.job.models.IntegrationLauncherConfig; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class NormalizationActivityImplTest { + + @Test + void checkNormalizationDataTypesSupportFromVersionString() { + Assertions.assertFalse(NormalizationActivityImpl.normalizationSupportsV1DataTypes(withNormalizationVersion("0.2.5"))); + Assertions.assertFalse(NormalizationActivityImpl.normalizationSupportsV1DataTypes(withNormalizationVersion("0.1.1"))); + Assertions.assertTrue(NormalizationActivityImpl.normalizationSupportsV1DataTypes(withNormalizationVersion("0.3.0"))); + Assertions.assertTrue(NormalizationActivityImpl.normalizationSupportsV1DataTypes(withNormalizationVersion("0.4.1"))); + Assertions.assertTrue(NormalizationActivityImpl.normalizationSupportsV1DataTypes(withNormalizationVersion("dev"))); + Assertions.assertTrue(NormalizationActivityImpl.normalizationSupportsV1DataTypes(withNormalizationVersion("protocolv1"))); + } + + private IntegrationLauncherConfig withNormalizationVersion(final String version) { + return new IntegrationLauncherConfig() + .withNormalizationDockerImage("normalization:" + version); + } + +}