From bf43f6264d679f1075e22ad814a90596d34934a0 Mon Sep 17 00:00:00 2001 From: "Pedro S. Lopez" Date: Wed, 2 Nov 2022 20:16:30 -0400 Subject: [PATCH] feat: generate full connector catalog json (#18562) * move combo catalog generator from cloud to oss, trigger on processResources * generate catalog * regenerate catalog * add test * add explicit gradle task for generating combo catalog * run format * ignore generated file from the formatter * update generated catalog * ignore oss catalog * fix ignore path --- .gitignore | 3 + airbyte-config/specs/build.gradle | 15 +++ .../CombinedConnectorCatalogGenerator.java | 109 ++++++++++++++++ ...CombinedConnectorCatalogGeneratorTest.java | 122 ++++++++++++++++++ build.gradle | 1 + 5 files changed, 250 insertions(+) create mode 100644 airbyte-config/specs/src/main/java/io/airbyte/config/specs/CombinedConnectorCatalogGenerator.java create mode 100644 airbyte-config/specs/src/test/java/io/airbyte/config/specs/CombinedConnectorCatalogGeneratorTest.java diff --git a/.gitignore b/.gitignore index 090995b262b4b..82fcdcf890c65 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,9 @@ docs/SUMMARY.md # Files generated by unit tests **/specs_secrets_mask.yaml +# Files generated for uploading to GCS +airbyte-config/**/resources/seed/oss_catalog.json + # Helm charts .tgz dependencies charts/**/charts diff --git a/airbyte-config/specs/build.gradle b/airbyte-config/specs/build.gradle index 6dde4b8e7f498..e278115b6b781 100644 --- a/airbyte-config/specs/build.gradle +++ b/airbyte-config/specs/build.gradle @@ -7,6 +7,7 @@ dependencies { implementation project(':airbyte-commons') implementation project(':airbyte-commons-cli') + implementation project(':airbyte-commons-docker') implementation project(':airbyte-config:config-models') implementation project(':airbyte-protocol:protocol-models') implementation project(':airbyte-json-validation') @@ -32,4 +33,18 @@ task generateConnectorSpecsMask(type: JavaExec, dependsOn: generateSeedConnector project(":airbyte-config:init").tasks.processResources.dependsOn(generateConnectorSpecsMask) +task generateCombinedConnectorCatalog(type: JavaExec, dependsOn: generateSeedConnectorSpecs) { + classpath = sourceSets.main.runtimeClasspath + + mainClass = 'io.airbyte.config.specs.CombinedConnectorCatalogGenerator' + + args '--seed-root' + args new File(project(":airbyte-config:init").projectDir, '/src/main/resources/seed') + + args '--output-filename' + args 'oss_catalog.json' +} + +project(":airbyte-config:init").tasks.processResources.dependsOn(generateCombinedConnectorCatalog) + Task publishArtifactsTask = getPublishArtifactsTask("$rootProject.ext.version", project) diff --git a/airbyte-config/specs/src/main/java/io/airbyte/config/specs/CombinedConnectorCatalogGenerator.java b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/CombinedConnectorCatalogGenerator.java new file mode 100644 index 0000000000000..a208020bba014 --- /dev/null +++ b/airbyte-config/specs/src/main/java/io/airbyte/config/specs/CombinedConnectorCatalogGenerator.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.BooleanNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.cli.Clis; +import io.airbyte.commons.docker.DockerUtils; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.util.MoreIterators; +import io.airbyte.commons.yaml.Yamls; +import io.airbyte.config.AirbyteConfigValidator; +import io.airbyte.config.CombinedConnectorCatalog; +import io.airbyte.config.ConfigSchema; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.config.StandardSourceDefinition; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; + +/** + * Generates a combined representation of the connector catalog that includes Sources, Destinations + * and their specs all in one. This connector catalog can then be served and loaded from a + * RemoteDefinitionsProvider. + */ +public class CombinedConnectorCatalogGenerator { + + private static final Option SEED_ROOT_OPTION = Option.builder("s").longOpt("seed-root").hasArg(true).required(true) + .desc("path to where seed resource files are stored").build(); + private static final Option OUTPUT_FILENAME_OPTION = Option.builder("o").longOpt("output-filename").hasArg(true).required(true) + .desc("name for the generated catalog json file").build(); + private static final Options OPTIONS = new Options().addOption(SEED_ROOT_OPTION).addOption(OUTPUT_FILENAME_OPTION); + + public static void main(final String[] args) throws Exception { + final CommandLine parsed = Clis.parse(args, OPTIONS); + final Path outputRoot = Path.of(parsed.getOptionValue(SEED_ROOT_OPTION.getOpt())); + final String outputFileName = parsed.getOptionValue(OUTPUT_FILENAME_OPTION.getOpt()); + + final CombinedConnectorCatalogGenerator combinedConnectorCatalogGenerator = new CombinedConnectorCatalogGenerator(); + combinedConnectorCatalogGenerator.run(outputRoot, outputFileName); + } + + public void run(final Path outputRoot, final String outputFileName) { + final List destinationDefinitionsJson = getSeedJson(outputRoot, SeedConnectorType.DESTINATION.getDefinitionFileName()); + final List destinationSpecsJson = getSeedJson(outputRoot, SeedConnectorType.DESTINATION.getSpecFileName()); + final List sourceDefinitionsJson = getSeedJson(outputRoot, SeedConnectorType.SOURCE.getDefinitionFileName()); + final List sourceSpecsJson = getSeedJson(outputRoot, SeedConnectorType.SOURCE.getSpecFileName()); + + mergeSpecsIntoDefinitions(destinationDefinitionsJson, destinationSpecsJson, ConfigSchema.STANDARD_DESTINATION_DEFINITION); + mergeSpecsIntoDefinitions(sourceDefinitionsJson, sourceSpecsJson, ConfigSchema.STANDARD_SOURCE_DEFINITION); + + final CombinedConnectorCatalog combinedCatalog = new CombinedConnectorCatalog() + .withDestinations(destinationDefinitionsJson.stream().map(j -> Jsons.object(j, StandardDestinationDefinition.class)).toList()) + .withSources(sourceDefinitionsJson.stream().map(j -> Jsons.object(j, StandardSourceDefinition.class)).toList()); + + IOs.writeFile(outputRoot.resolve(outputFileName), Jsons.toPrettyString(Jsons.jsonNode(combinedCatalog))); + } + + private List getSeedJson(final Path root, final String fileName) { + final String jsonString = IOs.readFile(root, fileName); + return MoreIterators.toList(Yamls.deserialize(jsonString).elements()); + } + + /** + * Updates all connector definitions with provided specs. + * + * @param definitions - List of Source or Destination Definitions as generated in the seed files + * @param specs - List of connector specs as generated in the seed files (see + * {@link DockerImageSpec}) + */ + @VisibleForTesting + void mergeSpecsIntoDefinitions(final List definitions, final List specs, final ConfigSchema configSchema) { + final Map specsByImage = specs.stream().collect(Collectors.toMap( + json -> json.get("dockerImage").asText(), + json -> json.get("spec"))); + + for (final JsonNode definition : definitions) { + final String dockerImage = DockerUtils.getTaggedImageName( + definition.get("dockerRepository").asText(), + definition.get("dockerImageTag").asText()); + final JsonNode specConfigJson = specsByImage.get(dockerImage); + + if (specConfigJson == null) { + throw new UnsupportedOperationException(String.format("A spec for docker image %s was not found", dockerImage)); + } + + ((ObjectNode) definition).set("spec", specConfigJson); + + if (!definition.hasNonNull("public")) { + // All definitions in the catalog are public by default + ((ObjectNode) definition).set("public", BooleanNode.TRUE); + } + + AirbyteConfigValidator.AIRBYTE_CONFIG_VALIDATOR.ensureAsRuntime(configSchema, definition); + } + + } + +} diff --git a/airbyte-config/specs/src/test/java/io/airbyte/config/specs/CombinedConnectorCatalogGeneratorTest.java b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/CombinedConnectorCatalogGeneratorTest.java new file mode 100644 index 0000000000000..73c95138dd025 --- /dev/null +++ b/airbyte-config/specs/src/test/java/io/airbyte/config/specs/CombinedConnectorCatalogGeneratorTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.config.specs; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.ConfigSchema; +import io.airbyte.config.DockerImageSpec; +import io.airbyte.config.StandardDestinationDefinition; +import io.airbyte.protocol.models.ConnectorSpecification; +import java.util.List; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class CombinedConnectorCatalogGeneratorTest { + + private static final UUID DEF_ID1 = UUID.randomUUID(); + private static final UUID DEF_ID2 = UUID.randomUUID(); + private static final String CONNECTOR_NAME1 = "connector1"; + private static final String CONNECTOR_NAME2 = "connector2"; + private static final String DOCUMENTATION_URL = "https://www.example.com"; + private static final String DOCKER_REPOSITORY1 = "airbyte/connector1"; + private static final String DOCKER_REPOSITORY2 = "airbyte/connector2"; + private static final String DOCKER_TAG1 = "0.1.0"; + private static final String DOCKER_TAG2 = "0.2.0"; + + private CombinedConnectorCatalogGenerator catalogGenerator; + + @BeforeEach + void setup() { + catalogGenerator = new CombinedConnectorCatalogGenerator(); + } + + @Test + void testMergeSpecsIntoDefinitions() { + final StandardDestinationDefinition destinationDefinition1 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL) + .withSpec(new ConnectorSpecification()); + final StandardDestinationDefinition destinationDefinition2 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID2) + .withDockerRepository(DOCKER_REPOSITORY2) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME2) + .withDocumentationUrl(DOCUMENTATION_URL) + .withSpec(new ConnectorSpecification()); + final DockerImageSpec destinationSpec1 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1) + .withSpec(new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( + "foo1", + "bar1")))); + final DockerImageSpec destinationSpec2 = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY2 + ":" + DOCKER_TAG2) + .withSpec(new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( + "foo2", + "bar2")))); + + final List definitions = List.of(Jsons.jsonNode(destinationDefinition1), Jsons.jsonNode(destinationDefinition2)); + final List specs = List.of(Jsons.jsonNode(destinationSpec1), Jsons.jsonNode(destinationSpec2)); + + catalogGenerator.mergeSpecsIntoDefinitions(definitions, specs, ConfigSchema.STANDARD_DESTINATION_DEFINITION); + + final StandardDestinationDefinition expectedDefinition1 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL) + .withSpec(destinationSpec1.getSpec()); + + final StandardDestinationDefinition expectedDefinition2 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID2) + .withDockerRepository(DOCKER_REPOSITORY2) + .withDockerImageTag(DOCKER_TAG2) + .withName(CONNECTOR_NAME2) + .withDocumentationUrl(DOCUMENTATION_URL) + .withSpec(destinationSpec2.getSpec()); + + assertEquals(Jsons.jsonNode(expectedDefinition1), definitions.get(0)); + assertEquals(Jsons.jsonNode(expectedDefinition2), definitions.get(1)); + } + + @Test + void testMergeSpecsIntoDefinitionsThrowsOnMissingSpec() { + final StandardDestinationDefinition destinationDefinition1 = new StandardDestinationDefinition() + .withDestinationDefinitionId(DEF_ID1) + .withDockerRepository(DOCKER_REPOSITORY1) + .withDockerImageTag(DOCKER_TAG1) + .withName(CONNECTOR_NAME1) + .withDocumentationUrl(DOCUMENTATION_URL) + .withSpec(new ConnectorSpecification()); + final List definitions = List.of(Jsons.jsonNode(destinationDefinition1)); + final List specs = List.of(); + + assertThrows(UnsupportedOperationException.class, + () -> catalogGenerator.mergeSpecsIntoDefinitions(definitions, specs, ConfigSchema.STANDARD_DESTINATION_DEFINITION)); + } + + @Test + void testMergeSpecsIntoDefinitionsThrowsOnInvalidFormat() { + final JsonNode invalidDefinition = Jsons.jsonNode(ImmutableMap.of("dockerRepository", DOCKER_REPOSITORY1, "dockerImageTag", DOCKER_TAG1)); + final DockerImageSpec destinationSpec = new DockerImageSpec().withDockerImage(DOCKER_REPOSITORY1 + ":" + DOCKER_TAG1) + .withSpec(new ConnectorSpecification().withConnectionSpecification(Jsons.jsonNode(ImmutableMap.of( + "foo1", + "bar1")))); + + final List definitions = List.of(Jsons.jsonNode(invalidDefinition)); + final List specs = List.of(Jsons.jsonNode(destinationSpec)); + + assertThrows(RuntimeException.class, + () -> catalogGenerator.mergeSpecsIntoDefinitions(definitions, specs, ConfigSchema.STANDARD_DESTINATION_DEFINITION)); + } + +} diff --git a/build.gradle b/build.gradle index e0a1bc11ce2f5..727e71510f336 100644 --- a/build.gradle +++ b/build.gradle @@ -121,6 +121,7 @@ def createSpotlessTarget = { pattern -> 'secrets', 'charts', // Helm charts often have injected template strings that will fail general linting. Helm linting is done separately. 'resources/seed/*_specs.yaml', // Do not remove - this is necessary to prevent diffs in our github workflows, as the file diff check runs between the Format step and the Build step, the latter of which generates the file. + 'resources/seed/*_catalog.json', // Do not remove - this is also necessary to prevent diffs in our github workflows 'airbyte-integrations/connectors/source-amplitude/unit_tests/api_data/zipped.json', // Zipped file presents as non-UTF-8 making spotless sad 'airbyte-webapp', // The webapp module uses its own auto-formatter, so spotless is not necessary here 'airbyte-webapp-e2e-tests', // This module also uses its own auto-formatter