-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
Copy pathReplicationJobOrchestrator.java
184 lines (163 loc) · 8.75 KB
/
ReplicationJobOrchestrator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
/*
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.container_orchestrator.orchestrator;
import static io.airbyte.metrics.lib.ApmTraceConstants.JOB_ORCHESTRATOR_OPERATION_NAME;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.DESTINATION_DOCKER_IMAGE_KEY;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.JOB_ID_KEY;
import static io.airbyte.metrics.lib.ApmTraceConstants.Tags.SOURCE_DOCKER_IMAGE_KEY;
import datadog.trace.api.Trace;
import io.airbyte.api.client.generated.DestinationApi;
import io.airbyte.api.client.generated.SourceApi;
import io.airbyte.commons.features.FeatureFlagHelper;
import io.airbyte.commons.features.FeatureFlags;
import io.airbyte.commons.json.Jsons;
import io.airbyte.commons.logging.MdcScope;
import io.airbyte.commons.protocol.AirbyteMessageSerDeProvider;
import io.airbyte.commons.protocol.AirbyteMessageVersionedMigratorFactory;
import io.airbyte.commons.temporal.TemporalUtils;
import io.airbyte.commons.version.Version;
import io.airbyte.config.Configs;
import io.airbyte.config.ReplicationOutput;
import io.airbyte.config.StandardSyncInput;
import io.airbyte.metrics.lib.ApmTraceUtils;
import io.airbyte.metrics.lib.MetricClientFactory;
import io.airbyte.metrics.lib.MetricEmittingApps;
import io.airbyte.persistence.job.models.IntegrationLauncherConfig;
import io.airbyte.persistence.job.models.JobRunConfig;
import io.airbyte.workers.RecordSchemaValidator;
import io.airbyte.workers.WorkerConstants;
import io.airbyte.workers.WorkerMetricReporter;
import io.airbyte.workers.WorkerUtils;
import io.airbyte.workers.general.DefaultReplicationWorker;
import io.airbyte.workers.helper.ConnectorConfigUpdater;
import io.airbyte.workers.internal.AirbyteStreamFactory;
import io.airbyte.workers.internal.DefaultAirbyteDestination;
import io.airbyte.workers.internal.DefaultAirbyteSource;
import io.airbyte.workers.internal.DefaultAirbyteStreamFactory;
import io.airbyte.workers.internal.EmptyAirbyteSource;
import io.airbyte.workers.internal.NamespacingMapper;
import io.airbyte.workers.internal.VersionedAirbyteMessageBufferedWriterFactory;
import io.airbyte.workers.internal.VersionedAirbyteStreamFactory;
import io.airbyte.workers.internal.book_keeping.AirbyteMessageTracker;
import io.airbyte.workers.process.AirbyteIntegrationLauncher;
import io.airbyte.workers.process.KubePodProcess;
import io.airbyte.workers.process.ProcessFactory;
import io.airbyte.workers.sync.ReplicationLauncherWorker;
import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
import java.util.Map;
import java.util.Optional;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ReplicationJobOrchestrator implements JobOrchestrator<StandardSyncInput> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final ProcessFactory processFactory;
private final Configs configs;
private final FeatureFlags featureFlags;
private final AirbyteMessageSerDeProvider serDeProvider;
private final AirbyteMessageVersionedMigratorFactory migratorFactory;
private final JobRunConfig jobRunConfig;
private final SourceApi sourceApi;
private final DestinationApi destinationApi;
public ReplicationJobOrchestrator(final Configs configs,
final ProcessFactory processFactory,
final FeatureFlags featureFlags,
final AirbyteMessageSerDeProvider serDeProvider,
final AirbyteMessageVersionedMigratorFactory migratorFactory,
final JobRunConfig jobRunConfig,
final SourceApi sourceApi,
final DestinationApi destinationApi) {
this.configs = configs;
this.processFactory = processFactory;
this.featureFlags = featureFlags;
this.serDeProvider = serDeProvider;
this.migratorFactory = migratorFactory;
this.jobRunConfig = jobRunConfig;
this.sourceApi = sourceApi;
this.destinationApi = destinationApi;
}
@Override
public String getOrchestratorName() {
return "Replication";
}
@Override
public Class<StandardSyncInput> getInputClass() {
return StandardSyncInput.class;
}
@Trace(operationName = JOB_ORCHESTRATOR_OPERATION_NAME)
@Override
public Optional<String> runJob() throws Exception {
final var syncInput = readInput();
final var sourceLauncherConfig = JobOrchestrator.readAndDeserializeFile(
Path.of(KubePodProcess.CONFIG_DIR, ReplicationLauncherWorker.INIT_FILE_SOURCE_LAUNCHER_CONFIG),
IntegrationLauncherConfig.class);
final var destinationLauncherConfig = JobOrchestrator.readAndDeserializeFile(
Path.of(KubePodProcess.CONFIG_DIR, ReplicationLauncherWorker.INIT_FILE_DESTINATION_LAUNCHER_CONFIG),
IntegrationLauncherConfig.class);
log.info("sourceLauncherConfig is: " + sourceLauncherConfig.toString());
ApmTraceUtils.addTagsToTrace(
Map.of(JOB_ID_KEY, jobRunConfig.getJobId(),
DESTINATION_DOCKER_IMAGE_KEY, destinationLauncherConfig.getDockerImage(),
SOURCE_DOCKER_IMAGE_KEY, sourceLauncherConfig.getDockerImage()));
// At this moment, if either source or destination is from custom connector image, we will put all
// jobs into isolated pool to run.
final boolean useIsolatedPool = sourceLauncherConfig.getIsCustomConnector() || destinationLauncherConfig.getIsCustomConnector();
log.info("Setting up source launcher...");
final var sourceLauncher = new AirbyteIntegrationLauncher(
sourceLauncherConfig.getJobId(),
Math.toIntExact(sourceLauncherConfig.getAttemptId()),
sourceLauncherConfig.getDockerImage(),
processFactory,
syncInput.getSourceResourceRequirements(),
sourceLauncherConfig.getAllowedHosts(),
useIsolatedPool,
featureFlags);
log.info("Setting up destination launcher...");
final var destinationLauncher = new AirbyteIntegrationLauncher(
destinationLauncherConfig.getJobId(),
Math.toIntExact(destinationLauncherConfig.getAttemptId()),
destinationLauncherConfig.getDockerImage(),
processFactory,
syncInput.getDestinationResourceRequirements(),
destinationLauncherConfig.getAllowedHosts(),
useIsolatedPool,
featureFlags);
log.info("Setting up source...");
// reset jobs use an empty source to induce resetting all data in destination.
final var airbyteSource =
WorkerConstants.RESET_JOB_SOURCE_DOCKER_IMAGE_STUB.equals(sourceLauncherConfig.getDockerImage()) ? new EmptyAirbyteSource(
featureFlags.useStreamCapableState())
: new DefaultAirbyteSource(sourceLauncher,
getStreamFactory(sourceLauncherConfig.getProtocolVersion(), DefaultAirbyteSource.CONTAINER_LOG_MDC_BUILDER), featureFlags);
MetricClientFactory.initialize(MetricEmittingApps.WORKER);
final var metricClient = MetricClientFactory.getMetricClient();
final var metricReporter = new WorkerMetricReporter(metricClient,
sourceLauncherConfig.getDockerImage());
log.info("Setting up replication worker...");
final var replicationWorker = new DefaultReplicationWorker(
jobRunConfig.getJobId(),
Math.toIntExact(jobRunConfig.getAttemptId()),
airbyteSource,
new NamespacingMapper(syncInput.getNamespaceDefinition(), syncInput.getNamespaceFormat(), syncInput.getPrefix()),
new DefaultAirbyteDestination(destinationLauncher, getStreamFactory(destinationLauncherConfig.getProtocolVersion(),
DefaultAirbyteDestination.CONTAINER_LOG_MDC_BUILDER),
new VersionedAirbyteMessageBufferedWriterFactory(serDeProvider, migratorFactory, destinationLauncherConfig.getProtocolVersion())),
new AirbyteMessageTracker(featureFlags),
new RecordSchemaValidator(WorkerUtils.mapStreamNamesToSchemas(syncInput)),
metricReporter,
new ConnectorConfigUpdater(sourceApi, destinationApi),
FeatureFlagHelper.isFieldSelectionEnabledForWorkspace(featureFlags, syncInput.getWorkspaceId()));
log.info("Running replication worker...");
final var jobRoot = TemporalUtils.getJobRoot(configs.getWorkspaceRoot(),
jobRunConfig.getJobId(), jobRunConfig.getAttemptId());
final ReplicationOutput replicationOutput = replicationWorker.run(syncInput, jobRoot);
log.info("Returning output...");
return Optional.of(Jsons.serialize(replicationOutput));
}
private AirbyteStreamFactory getStreamFactory(final Version protocolVersion, final MdcScope.Builder mdcScope) {
return protocolVersion != null
? new VersionedAirbyteStreamFactory(serDeProvider, migratorFactory, protocolVersion, mdcScope, Optional.of(RuntimeException.class))
: new DefaultAirbyteStreamFactory(mdcScope);
}
}