Skip to content

Commit 6bf9eba

Browse files
🐛 Updated source-mondodb-v2 performance and updated cursor for timestamp type (#8161)
* updated source-mongodb-v2 performance * updated code style * fixed remarks * fixed remarks * fixed remarks * updated strict encrypt source mongodb version * updated source mongodb work with empty collections * updated source mongodb timestamp cursor * updated mongodb source perfomance * fix code style * fix code style * updated tests and documentation * updated tests and documentation * updated tests and documentation * added vudangngoc changes * updated code style * updated code style
1 parent 7daa6a3 commit 6bf9eba

File tree

11 files changed

+82
-51
lines changed

11 files changed

+82
-51
lines changed

airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"sourceDefinitionId": "b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e",
33
"name": "MongoDb",
44
"dockerRepository": "airbyte/source-mongodb-v2",
5-
"dockerImageTag": "0.1.6",
5+
"dockerImageTag": "0.1.7",
66
"documentationUrl": "https://docs.airbyte.io/integrations/sources/mongodb-v2",
77
"icon": "mongodb.svg"
88
}

airbyte-config/init/src/main/resources/seed/source_definitions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@
365365
- name: MongoDb
366366
sourceDefinitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e
367367
dockerRepository: airbyte/source-mongodb-v2
368-
dockerImageTag: 0.1.6
368+
dockerImageTag: 0.1.7
369369
documentationUrl: https://docs.airbyte.io/integrations/sources/mongodb-v2
370370
icon: mongodb.svg
371371
sourceType: database

airbyte-config/init/src/main/resources/seed/source_specs.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3754,7 +3754,7 @@
37543754
supportsNormalization: false
37553755
supportsDBT: false
37563756
supported_destination_sync_modes: []
3757-
- dockerImage: "airbyte/source-mongodb-v2:0.1.6"
3757+
- dockerImage: "airbyte/source-mongodb-v2:0.1.7"
37583758
spec:
37593759
documentationUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2"
37603760
changelogUrl: "https://docs.airbyte.io/integrations/sources/mongodb-v2"

airbyte-db/lib/src/main/java/io/airbyte/db/mongodb/MongoUtils.java

+53-35
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
import io.airbyte.commons.util.MoreIterators;
1919
import io.airbyte.db.DataTypeUtils;
2020
import io.airbyte.protocol.models.JsonSchemaPrimitive;
21+
import java.util.ArrayList;
2122
import java.util.Arrays;
2223
import java.util.Collections;
2324
import java.util.HashMap;
24-
import java.util.HashSet;
2525
import java.util.List;
2626
import java.util.Map;
27-
import java.util.Set;
2827
import org.bson.BsonBinary;
2928
import org.bson.BsonDateTime;
3029
import org.bson.BsonDocument;
@@ -49,7 +48,6 @@ public class MongoUtils {
4948

5049
private static final String MISSING_TYPE = "missing";
5150
private static final String NULL_TYPE = "null";
52-
private static final String TYPE = "type";
5351
private static final String AIRBYTE_SUFFIX = "_aibyte_transform";
5452

5553
public static JsonSchemaPrimitive getType(final BsonType dataType) {
@@ -76,7 +74,7 @@ public static Object getBsonValue(final BsonType type, final String value) {
7674
case INT64 -> new BsonInt64(Long.parseLong(value));
7775
case DOUBLE -> new BsonDouble(Double.parseDouble(value));
7876
case DECIMAL128 -> Decimal128.parse(value);
79-
case TIMESTAMP -> new BsonTimestamp(Long.parseLong(value));
77+
case TIMESTAMP -> new BsonTimestamp(new DateTime(value).getValue());
8078
case DATE_TIME -> new BsonDateTime(new DateTime(value).getValue());
8179
case OBJECT_ID -> new ObjectId(value);
8280
case SYMBOL -> new Symbol(value);
@@ -121,7 +119,12 @@ private static ObjectNode readDocument(final BsonReader reader, final ObjectNode
121119

122120
private static void transformToStringIfMarked(final ObjectNode jsonNodes, final List<String> columnNames, final String fieldName) {
123121
if (columnNames.contains(fieldName + AIRBYTE_SUFFIX)) {
124-
jsonNodes.put(fieldName, jsonNodes.get(fieldName).asText());
122+
JsonNode data = jsonNodes.get(fieldName);
123+
if (data != null) {
124+
jsonNodes.put(fieldName, data.asText());
125+
} else {
126+
LOGGER.error("Field list out of sync, Document doesn't contain field: {}", fieldName);
127+
}
125128
}
126129
}
127130

@@ -184,9 +187,8 @@ public static Map<String, BsonType> getUniqueFields(final MongoCollection<Docume
184187
var allkeys = getFieldsName(collection);
185188
allkeys.forEach(key -> {
186189
var types = getTypes(collection, key);
187-
addUniqueType(result, collection, key, types);
190+
addUniqueType(result, key, types);
188191
});
189-
190192
return result;
191193
}
192194

@@ -202,42 +204,58 @@ private static List<String> getFieldsName(MongoCollection<Document> collection)
202204
}
203205
}
204206

207+
private static ArrayList<String> getTypes(MongoCollection<Document> collection, String name) {
208+
var fieldName = "$" + name;
209+
AggregateIterable<Document> output = collection.aggregate(Arrays.asList(
210+
new Document("$project", new Document("_id", 0).append("fieldType", new Document("$type", fieldName))),
211+
new Document("$group", new Document("_id", new Document("fieldType", "$fieldType"))
212+
.append("count", new Document("$sum", 1)))));
213+
var listOfTypes = new ArrayList<String>();
214+
var cursor = output.cursor();
215+
while (cursor.hasNext()) {
216+
var type = ((Document) cursor.next().get("_id")).get("fieldType").toString();
217+
if (!type.equals(MISSING_TYPE) && !type.equals(NULL_TYPE)) {
218+
listOfTypes.add(type);
219+
}
220+
}
221+
if (listOfTypes.isEmpty()) {
222+
listOfTypes.add(NULL_TYPE);
223+
}
224+
return listOfTypes;
225+
}
226+
205227
private static void addUniqueType(Map<String, BsonType> map,
206-
MongoCollection<Document> collection,
207228
String fieldName,
208-
Set<String> types) {
229+
List<String> types) {
209230
if (types.size() != 1) {
210231
map.put(fieldName + AIRBYTE_SUFFIX, BsonType.STRING);
211232
} else {
212-
var document = collection.find(new Document(fieldName,
213-
new Document("$type", types.stream().findFirst().get()))).first();
214-
var bsonDoc = toBsonDocument(document);
215-
try (final BsonReader reader = new BsonDocumentReader(bsonDoc)) {
216-
reader.readStartDocument();
217-
while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) {
218-
if (reader.readName().equals(fieldName)) {
219-
final var fieldType = reader.getCurrentBsonType();
220-
map.put(fieldName, fieldType);
221-
}
222-
reader.skipValue();
223-
}
224-
reader.readEndDocument();
225-
}
233+
var type = types.get(0);
234+
map.put(fieldName, getBsonTypeByTypeAlias(type));
226235
}
227236
}
228237

229-
private static Set<String> getTypes(MongoCollection<Document> collection, String fieldName) {
230-
var searchField = "$" + fieldName;
231-
var docTypes = collection.aggregate(List.of(
232-
new Document("$project", new Document(TYPE, new Document("$type", searchField))))).cursor();
233-
Set<String> types = new HashSet<>();
234-
while (docTypes.hasNext()) {
235-
var type = String.valueOf(docTypes.next().get(TYPE));
236-
if (!MISSING_TYPE.equals(type) && !NULL_TYPE.equals(type)) {
237-
types.add(type);
238-
}
239-
}
240-
return types.isEmpty() ? Set.of(NULL_TYPE) : types;
238+
private static BsonType getBsonTypeByTypeAlias(String typeAlias) {
239+
return switch (typeAlias) {
240+
case "double" -> BsonType.DOUBLE;
241+
case "string" -> BsonType.STRING;
242+
case "objectId" -> BsonType.OBJECT_ID;
243+
case "array" -> BsonType.ARRAY;
244+
case "binData" -> BsonType.BINARY;
245+
case "bool" -> BsonType.BOOLEAN;
246+
case "date" -> BsonType.DATE_TIME;
247+
case "null" -> BsonType.NULL;
248+
case "regex" -> BsonType.REGULAR_EXPRESSION;
249+
case "dbPointer" -> BsonType.DB_POINTER;
250+
case "javascript" -> BsonType.JAVASCRIPT;
251+
case "symbol" -> BsonType.SYMBOL;
252+
case "javascriptWithScope" -> BsonType.JAVASCRIPT_WITH_SCOPE;
253+
case "int" -> BsonType.INT32;
254+
case "timestamp" -> BsonType.TIMESTAMP;
255+
case "long" -> BsonType.INT64;
256+
case "decimal" -> BsonType.DECIMAL128;
257+
default -> BsonType.STRING;
258+
};
241259
}
242260

243261
private static BsonDocument toBsonDocument(final Document document) {

airbyte-integrations/connectors/source-mongodb-strict-encrypt/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar
88

99
RUN tar xf ${APPLICATION}.tar --strip-components=1
1010

11-
LABEL io.airbyte.version=0.1.4
11+
LABEL io.airbyte.version=0.1.5
1212
LABEL io.airbyte.name=airbyte/source-mongodb-strict-encrypt

airbyte-integrations/connectors/source-mongodb-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mongodb/MongodbSourceStrictEncryptAcceptanceTest.java

+9-4
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,11 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc
9090

9191
final MongoCollection<Document> collection = database.createCollection(COLLECTION_NAME);
9292
final var doc1 = new Document("id", "0001").append("name", "Test")
93-
.append("test", 10).append("test_array", new BsonArray(List.of(new BsonString("test"), new BsonString("mongo"))));
94-
final var doc2 = new Document("id", "0002").append("name", "Mongo").append("test", "test_value");
95-
final var doc3 = new Document("id", "0003").append("name", "Source").append("test", null);
93+
.append("test", 10).append("test_array", new BsonArray(List.of(new BsonString("test"), new BsonString("mongo"))))
94+
.append("double_test", 100.12).append("int_test", 100);
95+
final var doc2 = new Document("id", "0002").append("name", "Mongo").append("test", "test_value").append("int_test", 201);
96+
final var doc3 = new Document("id", "0003").append("name", "Source").append("test", null)
97+
.append("double_test", 212.11).append("int_test", 302);
9698

9799
collection.insertMany(List.of(doc1, doc2, doc3));
98100
}
@@ -122,7 +124,10 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() throws Exception {
122124
Field.of("id", JsonSchemaPrimitive.STRING),
123125
Field.of("name", JsonSchemaPrimitive.STRING),
124126
Field.of("test", JsonSchemaPrimitive.STRING),
125-
Field.of("test_array", JsonSchemaPrimitive.ARRAY))
127+
Field.of("test_array", JsonSchemaPrimitive.ARRAY),
128+
Field.of("empty_test", JsonSchemaPrimitive.STRING),
129+
Field.of("double_test", JsonSchemaPrimitive.NUMBER),
130+
Field.of("int_test", JsonSchemaPrimitive.NUMBER))
126131
.withSupportedSyncModes(Lists.newArrayList(SyncMode.INCREMENTAL))
127132
.withDefaultCursorField(List.of("_id")))));
128133
}

airbyte-integrations/connectors/source-mongodb-v2/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar
88

99
RUN tar xf ${APPLICATION}.tar --strip-components=1
1010

11-
LABEL io.airbyte.version=0.1.6
11+
LABEL io.airbyte.version=0.1.7
1212
LABEL io.airbyte.name=airbyte/source-mongodb-v2

airbyte-integrations/connectors/source-mongodb-v2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MongoDbSourceAbstractAcceptanceTest.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,10 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() throws Exception {
5959
Field.of("id", JsonSchemaPrimitive.STRING),
6060
Field.of("name", JsonSchemaPrimitive.STRING),
6161
Field.of("test", JsonSchemaPrimitive.STRING),
62-
Field.of("test_array", JsonSchemaPrimitive.ARRAY))
62+
Field.of("test_array", JsonSchemaPrimitive.ARRAY),
63+
Field.of("empty_test", JsonSchemaPrimitive.STRING),
64+
Field.of("double_test", JsonSchemaPrimitive.NUMBER),
65+
Field.of("int_test", JsonSchemaPrimitive.NUMBER))
6366
.withSupportedSyncModes(Lists.newArrayList(SyncMode.INCREMENTAL))
6467
.withDefaultCursorField(List.of("_id")))));
6568
}

airbyte-integrations/connectors/source-mongodb-v2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MongoDbSourceAtlasAcceptanceTest.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc
5757

5858
final MongoCollection<Document> collection = database.createCollection(COLLECTION_NAME);
5959
final var doc1 = new Document("id", "0001").append("name", "Test")
60-
.append("test", 10).append("test_array", new BsonArray(List.of(new BsonString("test"), new BsonString("mongo"))));
61-
final var doc2 = new Document("id", "0002").append("name", "Mongo").append("test", "test_value");
62-
final var doc3 = new Document("id", "0003").append("name", "Source").append("test", null);
60+
.append("test", 10).append("test_array", new BsonArray(List.of(new BsonString("test"), new BsonString("mongo"))))
61+
.append("double_test", 100.12).append("int_test", 100);
62+
final var doc2 = new Document("id", "0002").append("name", "Mongo").append("test", "test_value").append("int_test", 201);
63+
final var doc3 = new Document("id", "0003").append("name", "Source").append("test", null)
64+
.append("double_test", 212.11).append("int_test", 302);
6365

6466
collection.insertMany(List.of(doc1, doc2, doc3));
6567
}

airbyte-integrations/connectors/source-mongodb-v2/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MongoDbSourceStandaloneAcceptanceTest.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,11 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc
4949

5050
final MongoCollection<Document> collection = database.createCollection(COLLECTION_NAME);
5151
final var doc1 = new Document("id", "0001").append("name", "Test")
52-
.append("test", 10).append("test_array", new BsonArray(List.of(new BsonString("test"), new BsonString("mongo"))));
53-
final var doc2 = new Document("id", "0002").append("name", "Mongo").append("test", "test_value");
54-
final var doc3 = new Document("id", "0003").append("name", "Source").append("test", null);
52+
.append("test", 10).append("test_array", new BsonArray(List.of(new BsonString("test"), new BsonString("mongo"))))
53+
.append("double_test", 100.12).append("int_test", 100);
54+
final var doc2 = new Document("id", "0002").append("name", "Mongo").append("test", "test_value").append("int_test", 201);
55+
final var doc3 = new Document("id", "0003").append("name", "Source").append("test", null)
56+
.append("double_test", 212.11).append("int_test", 302);
5557

5658
collection.insertMany(List.of(doc1, doc2, doc3));
5759
}

docs/integrations/sources/mongodb-v2.md

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ For more information regarding configuration parameters, please see [MongoDb Doc
102102

103103
| Version | Date | Pull Request | Subject |
104104
| :--- | :--- | :--- | :--- |
105+
| 0.1.7 | 2021-11-22 | [8161](https://github.com/airbytehq/airbyte/pull/8161) | Updated Performance and updated cursor for timestamp type |
105106
| 0.1.5 | 2021-11-17 | [8046](https://github.com/airbytehq/airbyte/pull/8046) | Added milliseconds to convert timestamp to datetime format |
106107
| 0.1.4 | 2021-11-15 | [7982](https://github.com/airbytehq/airbyte/pull/7982) | Updated Performance |
107108
| 0.1.3 | 2021-10-19 | [7160](https://github.com/airbytehq/airbyte/pull/7160) | Fixed nested document parsing |

0 commit comments

Comments
 (0)