Skip to content

Commit 28cda7a

Browse files
committed
Added BQSchemaToProtoSchema functionality along with test cases; checked for linting
1 parent 4d3bce6 commit 28cda7a

File tree

3 files changed

+261
-0
lines changed

3 files changed

+261
-0
lines changed

google-cloud-bigquerystorage/pom.xml

+6
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@
108108
<groupId>org.apache.commons</groupId>
109109
<artifactId>commons-lang3</artifactId>
110110
</dependency>
111+
<dependency>
112+
<groupId>org.json</groupId>
113+
<artifactId>json</artifactId>
114+
<version>20200518</version>
115+
</dependency>
116+
111117

112118
<!-- Test dependencies -->
113119
<dependency>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigquery.storage.v1alpha2;
17+
18+
import com.google.common.collect.ImmutableMap;
19+
import com.google.protobuf.DescriptorProtos.DescriptorProto;
20+
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
21+
import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
22+
import com.google.protobuf.Descriptors;
23+
import com.google.protobuf.Descriptors.Descriptor;
24+
import com.google.protobuf.Descriptors.FileDescriptor;
25+
import java.util.ArrayList;
26+
import java.util.List;
27+
28+
/**
29+
* A class that checks the schema compatibility between user schema in proto descriptor and Bigquery
30+
* table schema. If this check is passed, then user can write to BigQuery table using the user
31+
* schema, otherwise the write will fail.
32+
*
33+
* <p>The implementation as of now is not complete, which measn, if this check passed, there is
34+
* still a possbility of writing will fail.
35+
*/
36+
public class JsonToProtoConverter {
37+
private static ImmutableMap<Table.TableFieldSchema.Mode, FieldDescriptorProto.Label>
38+
BQTableSchemaModeMap =
39+
ImmutableMap.of(
40+
Table.TableFieldSchema.Mode.NULLABLE, FieldDescriptorProto.Label.LABEL_OPTIONAL,
41+
Table.TableFieldSchema.Mode.REPEATED, FieldDescriptorProto.Label.LABEL_REPEATED,
42+
Table.TableFieldSchema.Mode.REQUIRED, FieldDescriptorProto.Label.LABEL_REQUIRED);
43+
44+
private static ImmutableMap<Table.TableFieldSchema.Type, FieldDescriptorProto.Type>
45+
BQTableSchemaTypeMap =
46+
new ImmutableMap.Builder<Table.TableFieldSchema.Type, FieldDescriptorProto.Type>()
47+
.put(Table.TableFieldSchema.Type.BOOL, FieldDescriptorProto.Type.TYPE_BOOL)
48+
.put(Table.TableFieldSchema.Type.BYTES, FieldDescriptorProto.Type.TYPE_BYTES)
49+
.put(Table.TableFieldSchema.Type.DATE, FieldDescriptorProto.Type.TYPE_INT64)
50+
.put(Table.TableFieldSchema.Type.DATETIME, FieldDescriptorProto.Type.TYPE_INT64)
51+
.put(Table.TableFieldSchema.Type.DOUBLE, FieldDescriptorProto.Type.TYPE_DOUBLE)
52+
.put(Table.TableFieldSchema.Type.GEOGRAPHY, FieldDescriptorProto.Type.TYPE_BYTES)
53+
.put(Table.TableFieldSchema.Type.INT64, FieldDescriptorProto.Type.TYPE_INT64)
54+
.put(Table.TableFieldSchema.Type.NUMERIC, FieldDescriptorProto.Type.TYPE_DOUBLE)
55+
.put(Table.TableFieldSchema.Type.STRING, FieldDescriptorProto.Type.TYPE_STRING)
56+
.put(Table.TableFieldSchema.Type.STRUCT, FieldDescriptorProto.Type.TYPE_MESSAGE)
57+
.put(Table.TableFieldSchema.Type.TIME, FieldDescriptorProto.Type.TYPE_INT64)
58+
.put(Table.TableFieldSchema.Type.TIMESTAMP, FieldDescriptorProto.Type.TYPE_INT64)
59+
.build();
60+
61+
/**
62+
* Converts Table.TableSchema to a Descriptors.Descriptor object.
63+
*
64+
* @param BQTableSchema
65+
* @throws Descriptors.DescriptorValidationException
66+
*/
67+
public static Descriptor BQTableSchemaToProtoSchema(Table.TableSchema BQTableSchema)
68+
throws Descriptors.DescriptorValidationException {
69+
Descriptor descriptor = BQTableSchemaToProtoSchemaImpl(BQTableSchema, "root");
70+
return descriptor;
71+
}
72+
73+
/**
74+
* Implementation that converts a Table.TableSchema to a Descriptors.Descriptor object.
75+
*
76+
* @param BQTableSchema
77+
* @param scope Keeps track of current scope to prevent repeated naming while constructing
78+
* descriptor.
79+
* @throws Descriptors.DescriptorValidationException
80+
*/
81+
private static Descriptor BQTableSchemaToProtoSchemaImpl(
82+
Table.TableSchema BQTableSchema, String scope)
83+
throws Descriptors.DescriptorValidationException {
84+
List<FileDescriptor> dependenciesList = new ArrayList<FileDescriptor>();
85+
List<FieldDescriptorProto> fields = new ArrayList<FieldDescriptorProto>();
86+
int index = 1;
87+
for (Table.TableFieldSchema BQTableField : BQTableSchema.getFieldsList()) {
88+
if (BQTableField.getType() == Table.TableFieldSchema.Type.STRUCT) {
89+
String currentScope = scope + BQTableField.getName();
90+
dependenciesList.add(
91+
BQTableSchemaToProtoSchemaImpl(
92+
Table.TableSchema.newBuilder()
93+
.addAllFields(BQTableField.getFieldsList())
94+
.build(),
95+
currentScope)
96+
.getFile());
97+
fields.add(BQStructToProtoMessage(BQTableField, index++, currentScope));
98+
} else {
99+
fields.add(BQTableFieldToProtoField(BQTableField, index++));
100+
}
101+
}
102+
FileDescriptor[] dependenciesArray = new FileDescriptor[dependenciesList.size()];
103+
dependenciesArray = dependenciesList.toArray(dependenciesArray);
104+
DescriptorProto descriptorProto =
105+
DescriptorProto.newBuilder().setName(scope).addAllField(fields).build();
106+
FileDescriptorProto fileDescriptorProto =
107+
FileDescriptorProto.newBuilder().addMessageType(descriptorProto).build();
108+
FileDescriptor fileDescriptor =
109+
FileDescriptor.buildFrom(fileDescriptorProto, dependenciesArray);
110+
Descriptor descriptor = fileDescriptor.findMessageTypeByName(scope);
111+
return descriptor;
112+
}
113+
114+
/**
115+
* Constructs a FieldDescriptorProto for non-struct BQ fields.
116+
*
117+
* @param BQTableField BQ Field used to construct a FieldDescriptorProto
118+
* @param index Index for protobuf fields.
119+
*/
120+
private static FieldDescriptorProto BQTableFieldToProtoField(
121+
Table.TableFieldSchema BQTableField, int index) {
122+
String fieldName = BQTableField.getName();
123+
Table.TableFieldSchema.Mode mode = BQTableField.getMode();
124+
return FieldDescriptorProto.newBuilder()
125+
.setName(fieldName)
126+
.setType((FieldDescriptorProto.Type) BQTableSchemaTypeMap.get(BQTableField.getType()))
127+
.setLabel((FieldDescriptorProto.Label) BQTableSchemaModeMap.get(mode))
128+
.setNumber(index)
129+
.build();
130+
}
131+
132+
/**
133+
* Constructs a FieldDescriptorProto for a Struct type BQ field.
134+
*
135+
* @param BQTableField BQ Field used to construct a FieldDescriptorProto
136+
* @param index Index for protobuf fields.
137+
* @param scope Need scope to prevent naming issues
138+
*/
139+
private static FieldDescriptorProto BQStructToProtoMessage(
140+
Table.TableFieldSchema BQTableField, int index, String scope) {
141+
String fieldName = BQTableField.getName();
142+
Table.TableFieldSchema.Mode mode = BQTableField.getMode();
143+
return FieldDescriptorProto.newBuilder()
144+
.setName(fieldName)
145+
.setTypeName(scope)
146+
.setLabel((FieldDescriptorProto.Label) BQTableSchemaModeMap.get(mode))
147+
.setNumber(index)
148+
.build();
149+
}
150+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigquery.storage.v1alpha2;
17+
18+
import static org.junit.Assert.*;
19+
import static org.mockito.Mockito.*;
20+
21+
import com.google.cloud.bigquery.storage.test.SchemaTest.*;
22+
import com.google.common.collect.ImmutableMap;
23+
import com.google.protobuf.Descriptors.Descriptor;
24+
import com.google.protobuf.Descriptors.FieldDescriptor;
25+
import java.util.Map;
26+
import org.junit.Test;
27+
import org.junit.runner.RunWith;
28+
import org.junit.runners.JUnit4;
29+
30+
@RunWith(JUnit4.class)
31+
public class JsonToProtoConverterTest {
32+
private static ImmutableMap<Table.TableFieldSchema.Type, Descriptor>
33+
BQTableTypeToProtoDescriptor =
34+
new ImmutableMap.Builder<Table.TableFieldSchema.Type, Descriptor>()
35+
.put(Table.TableFieldSchema.Type.BOOL, BoolType.getDescriptor())
36+
.put(Table.TableFieldSchema.Type.BYTES, BytesType.getDescriptor())
37+
.put(Table.TableFieldSchema.Type.DATE, Int64Type.getDescriptor())
38+
.put(Table.TableFieldSchema.Type.DATETIME, Int64Type.getDescriptor())
39+
.put(Table.TableFieldSchema.Type.DOUBLE, DoubleType.getDescriptor())
40+
.put(Table.TableFieldSchema.Type.GEOGRAPHY, BytesType.getDescriptor())
41+
.put(Table.TableFieldSchema.Type.INT64, Int64Type.getDescriptor())
42+
.put(Table.TableFieldSchema.Type.NUMERIC, DoubleType.getDescriptor())
43+
.put(Table.TableFieldSchema.Type.STRING, StringType.getDescriptor())
44+
.put(Table.TableFieldSchema.Type.TIME, Int64Type.getDescriptor())
45+
.put(Table.TableFieldSchema.Type.TIMESTAMP, Int64Type.getDescriptor())
46+
.build();
47+
48+
private boolean isDescriptorEqual(Descriptor convertedProto, Descriptor originalProto) {
49+
for (FieldDescriptor convertedField : convertedProto.getFields()) {
50+
FieldDescriptor originalField = originalProto.findFieldByName(convertedField.getName());
51+
if (originalField == null) {
52+
return false;
53+
}
54+
FieldDescriptor.Type convertedType = convertedField.getType();
55+
FieldDescriptor.Type originalType = originalField.getType();
56+
if (convertedType != originalType) {
57+
return false;
58+
}
59+
if (convertedType == FieldDescriptor.Type.MESSAGE) {
60+
if (!isDescriptorEqual(convertedField.getMessageType(), originalField.getMessageType())) {
61+
return false;
62+
}
63+
}
64+
}
65+
return true;
66+
}
67+
68+
@Test
69+
public void testBQTableSchemaToProtoDescriptorSimpleTypes() throws Exception {
70+
for (Map.Entry<Table.TableFieldSchema.Type, Descriptor> entry :
71+
BQTableTypeToProtoDescriptor.entrySet()) {
72+
Table.TableFieldSchema tableFieldSchema =
73+
Table.TableFieldSchema.newBuilder()
74+
.setType(entry.getKey())
75+
.setMode(Table.TableFieldSchema.Mode.NULLABLE)
76+
.setName("test_field_type")
77+
.build();
78+
Table.TableSchema tableSchema =
79+
Table.TableSchema.newBuilder().addFields(0, tableFieldSchema).build();
80+
Descriptor descriptor = JsonToProtoConverter.BQTableSchemaToProtoSchema(tableSchema);
81+
assertTrue(isDescriptorEqual(descriptor, entry.getValue()));
82+
}
83+
}
84+
85+
@Test
86+
public void testBQTableSchemaToProtoDescriptorComplex() throws Exception {
87+
Table.TableFieldSchema StringType =
88+
Table.TableFieldSchema.newBuilder()
89+
.setType(Table.TableFieldSchema.Type.STRING)
90+
.setMode(Table.TableFieldSchema.Mode.NULLABLE)
91+
.setName("test_field_type")
92+
.build();
93+
Table.TableFieldSchema tableFieldSchema =
94+
Table.TableFieldSchema.newBuilder()
95+
.setType(Table.TableFieldSchema.Type.STRUCT)
96+
.setMode(Table.TableFieldSchema.Mode.NULLABLE)
97+
.setName("test_field_type")
98+
.addFields(0, StringType)
99+
.build();
100+
Table.TableSchema tableSchema =
101+
Table.TableSchema.newBuilder().addFields(0, tableFieldSchema).build();
102+
Descriptor descriptor = JsonToProtoConverter.BQTableSchemaToProtoSchema(tableSchema);
103+
assertTrue(isDescriptorEqual(descriptor, MessageType.getDescriptor()));
104+
}
105+
}

0 commit comments

Comments
 (0)