18
18
import io .airbyte .commons .util .MoreIterators ;
19
19
import io .airbyte .db .DataTypeUtils ;
20
20
import io .airbyte .protocol .models .JsonSchemaPrimitive ;
21
+ import java .util .ArrayList ;
21
22
import java .util .Arrays ;
22
23
import java .util .Collections ;
23
24
import java .util .HashMap ;
24
- import java .util .HashSet ;
25
25
import java .util .List ;
26
26
import java .util .Map ;
27
- import java .util .Set ;
28
27
import org .bson .BsonBinary ;
29
28
import org .bson .BsonDateTime ;
30
29
import org .bson .BsonDocument ;
@@ -49,7 +48,6 @@ public class MongoUtils {
49
48
50
49
private static final String MISSING_TYPE = "missing" ;
51
50
private static final String NULL_TYPE = "null" ;
52
- private static final String TYPE = "type" ;
53
51
private static final String AIRBYTE_SUFFIX = "_aibyte_transform" ;
54
52
55
53
public static JsonSchemaPrimitive getType (final BsonType dataType ) {
@@ -76,7 +74,7 @@ public static Object getBsonValue(final BsonType type, final String value) {
76
74
case INT64 -> new BsonInt64 (Long .parseLong (value ));
77
75
case DOUBLE -> new BsonDouble (Double .parseDouble (value ));
78
76
case DECIMAL128 -> Decimal128 .parse (value );
79
- case TIMESTAMP -> new BsonTimestamp (Long . parseLong (value ));
77
+ case TIMESTAMP -> new BsonTimestamp (new DateTime (value ). getValue ( ));
80
78
case DATE_TIME -> new BsonDateTime (new DateTime (value ).getValue ());
81
79
case OBJECT_ID -> new ObjectId (value );
82
80
case SYMBOL -> new Symbol (value );
@@ -121,7 +119,12 @@ private static ObjectNode readDocument(final BsonReader reader, final ObjectNode
121
119
122
120
private static void transformToStringIfMarked (final ObjectNode jsonNodes , final List <String > columnNames , final String fieldName ) {
123
121
if (columnNames .contains (fieldName + AIRBYTE_SUFFIX )) {
124
- jsonNodes .put (fieldName , jsonNodes .get (fieldName ).asText ());
122
+ JsonNode data = jsonNodes .get (fieldName );
123
+ if (data != null ) {
124
+ jsonNodes .put (fieldName , data .asText ());
125
+ } else {
126
+ LOGGER .error ("Field list out of sync, Document doesn't contain field: {}" , fieldName );
127
+ }
125
128
}
126
129
}
127
130
@@ -184,9 +187,8 @@ public static Map<String, BsonType> getUniqueFields(final MongoCollection<Docume
184
187
var allkeys = getFieldsName (collection );
185
188
allkeys .forEach (key -> {
186
189
var types = getTypes (collection , key );
187
- addUniqueType (result , collection , key , types );
190
+ addUniqueType (result , key , types );
188
191
});
189
-
190
192
return result ;
191
193
}
192
194
@@ -202,42 +204,58 @@ private static List<String> getFieldsName(MongoCollection<Document> collection)
202
204
}
203
205
}
204
206
207
+ private static ArrayList <String > getTypes (MongoCollection <Document > collection , String name ) {
208
+ var fieldName = "$" + name ;
209
+ AggregateIterable <Document > output = collection .aggregate (Arrays .asList (
210
+ new Document ("$project" , new Document ("_id" , 0 ).append ("fieldType" , new Document ("$type" , fieldName ))),
211
+ new Document ("$group" , new Document ("_id" , new Document ("fieldType" , "$fieldType" ))
212
+ .append ("count" , new Document ("$sum" , 1 )))));
213
+ var listOfTypes = new ArrayList <String >();
214
+ var cursor = output .cursor ();
215
+ while (cursor .hasNext ()) {
216
+ var type = ((Document ) cursor .next ().get ("_id" )).get ("fieldType" ).toString ();
217
+ if (!type .equals (MISSING_TYPE ) && !type .equals (NULL_TYPE )) {
218
+ listOfTypes .add (type );
219
+ }
220
+ }
221
+ if (listOfTypes .isEmpty ()) {
222
+ listOfTypes .add (NULL_TYPE );
223
+ }
224
+ return listOfTypes ;
225
+ }
226
+
205
227
private static void addUniqueType (Map <String , BsonType > map ,
206
- MongoCollection <Document > collection ,
207
228
String fieldName ,
208
- Set <String > types ) {
229
+ List <String > types ) {
209
230
if (types .size () != 1 ) {
210
231
map .put (fieldName + AIRBYTE_SUFFIX , BsonType .STRING );
211
232
} else {
212
- var document = collection .find (new Document (fieldName ,
213
- new Document ("$type" , types .stream ().findFirst ().get ()))).first ();
214
- var bsonDoc = toBsonDocument (document );
215
- try (final BsonReader reader = new BsonDocumentReader (bsonDoc )) {
216
- reader .readStartDocument ();
217
- while (reader .readBsonType () != BsonType .END_OF_DOCUMENT ) {
218
- if (reader .readName ().equals (fieldName )) {
219
- final var fieldType = reader .getCurrentBsonType ();
220
- map .put (fieldName , fieldType );
221
- }
222
- reader .skipValue ();
223
- }
224
- reader .readEndDocument ();
225
- }
233
+ var type = types .get (0 );
234
+ map .put (fieldName , getBsonTypeByTypeAlias (type ));
226
235
}
227
236
}
228
237
229
- private static Set <String > getTypes (MongoCollection <Document > collection , String fieldName ) {
230
- var searchField = "$" + fieldName ;
231
- var docTypes = collection .aggregate (List .of (
232
- new Document ("$project" , new Document (TYPE , new Document ("$type" , searchField ))))).cursor ();
233
- Set <String > types = new HashSet <>();
234
- while (docTypes .hasNext ()) {
235
- var type = String .valueOf (docTypes .next ().get (TYPE ));
236
- if (!MISSING_TYPE .equals (type ) && !NULL_TYPE .equals (type )) {
237
- types .add (type );
238
- }
239
- }
240
- return types .isEmpty () ? Set .of (NULL_TYPE ) : types ;
238
+ private static BsonType getBsonTypeByTypeAlias (String typeAlias ) {
239
+ return switch (typeAlias ) {
240
+ case "double" -> BsonType .DOUBLE ;
241
+ case "string" -> BsonType .STRING ;
242
+ case "objectId" -> BsonType .OBJECT_ID ;
243
+ case "array" -> BsonType .ARRAY ;
244
+ case "binData" -> BsonType .BINARY ;
245
+ case "bool" -> BsonType .BOOLEAN ;
246
+ case "date" -> BsonType .DATE_TIME ;
247
+ case "null" -> BsonType .NULL ;
248
+ case "regex" -> BsonType .REGULAR_EXPRESSION ;
249
+ case "dbPointer" -> BsonType .DB_POINTER ;
250
+ case "javascript" -> BsonType .JAVASCRIPT ;
251
+ case "symbol" -> BsonType .SYMBOL ;
252
+ case "javascriptWithScope" -> BsonType .JAVASCRIPT_WITH_SCOPE ;
253
+ case "int" -> BsonType .INT32 ;
254
+ case "timestamp" -> BsonType .TIMESTAMP ;
255
+ case "long" -> BsonType .INT64 ;
256
+ case "decimal" -> BsonType .DECIMAL128 ;
257
+ default -> BsonType .STRING ;
258
+ };
241
259
}
242
260
243
261
private static BsonDocument toBsonDocument (final Document document ) {
0 commit comments