Skip to content

Commit aaf1eb8

Browse files
feat: preserve unknown fields from the REST API representation in SchemaField (#2097)
* feat: preserve unknown fields from the REST API representaton in `SchemaField` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unnecessary variable * remove unused private method * fix pytype --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 887e126 commit aaf1eb8

File tree

4 files changed

+105
-75
lines changed

4 files changed

+105
-75
lines changed

google/cloud/bigquery/schema.py

+25-57
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616

1717
import collections
1818
import enum
19-
from typing import Any, Dict, Iterable, Optional, Union, cast
19+
from typing import Any, cast, Dict, Iterable, Optional, Union
2020

21+
from google.cloud.bigquery import _helpers
2122
from google.cloud.bigquery import standard_sql
2223
from google.cloud.bigquery.enums import StandardSqlTypeNames
2324

@@ -203,15 +204,8 @@ def __init__(
203204
self._properties["rangeElementType"] = {"type": range_element_type}
204205
if isinstance(range_element_type, FieldElementType):
205206
self._properties["rangeElementType"] = range_element_type.to_api_repr()
206-
207-
self._fields = tuple(fields)
208-
209-
@staticmethod
210-
def __get_int(api_repr, name):
211-
v = api_repr.get(name, _DEFAULT_VALUE)
212-
if v is not _DEFAULT_VALUE:
213-
v = int(v)
214-
return v
207+
if fields: # Don't set the property if it's not set.
208+
self._properties["fields"] = [field.to_api_repr() for field in fields]
215209

216210
@classmethod
217211
def from_api_repr(cls, api_repr: dict) -> "SchemaField":
@@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
225219
Returns:
226220
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
227221
"""
228-
field_type = api_repr["type"].upper()
229-
230-
# Handle optional properties with default values
231-
mode = api_repr.get("mode", "NULLABLE")
232-
description = api_repr.get("description", _DEFAULT_VALUE)
233-
fields = api_repr.get("fields", ())
234-
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
222+
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
235223

236-
default_value_expression = api_repr.get("defaultValueExpression", None)
224+
# Note: we don't make a copy of api_repr because this can cause
225+
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
226+
# fields. See https://github.com/googleapis/python-bigquery/issues/6
227+
placeholder._properties = api_repr
237228

238-
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
239-
policy_tags = PolicyTagList.from_api_repr(policy_tags)
240-
241-
if api_repr.get("rangeElementType"):
242-
range_element_type = cast(dict, api_repr.get("rangeElementType"))
243-
element_type = range_element_type.get("type")
244-
else:
245-
element_type = None
246-
247-
return cls(
248-
field_type=field_type,
249-
fields=[cls.from_api_repr(f) for f in fields],
250-
mode=mode.upper(),
251-
default_value_expression=default_value_expression,
252-
description=description,
253-
name=api_repr["name"],
254-
policy_tags=policy_tags,
255-
precision=cls.__get_int(api_repr, "precision"),
256-
scale=cls.__get_int(api_repr, "scale"),
257-
max_length=cls.__get_int(api_repr, "maxLength"),
258-
range_element_type=element_type,
259-
)
229+
return placeholder
260230

261231
@property
262232
def name(self):
263233
"""str: The name of the field."""
264-
return self._properties["name"]
234+
return self._properties.get("name", "")
265235

266236
@property
267237
def field_type(self):
@@ -270,7 +240,10 @@ def field_type(self):
270240
See:
271241
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
272242
"""
273-
return self._properties["type"]
243+
type_ = self._properties.get("type")
244+
if type_ is None: # Shouldn't happen, but some unit tests do this.
245+
return None
246+
return cast(str, type_).upper()
274247

275248
@property
276249
def mode(self):
@@ -279,7 +252,7 @@ def mode(self):
279252
See:
280253
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
281254
"""
282-
return self._properties.get("mode")
255+
return cast(str, self._properties.get("mode", "NULLABLE")).upper()
283256

284257
@property
285258
def is_nullable(self):
@@ -299,17 +272,17 @@ def description(self):
299272
@property
300273
def precision(self):
301274
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
302-
return self._properties.get("precision")
275+
return _helpers._int_or_none(self._properties.get("precision"))
303276

304277
@property
305278
def scale(self):
306279
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
307-
return self._properties.get("scale")
280+
return _helpers._int_or_none(self._properties.get("scale"))
308281

309282
@property
310283
def max_length(self):
311284
"""Optional[int]: Maximum length for the STRING or BYTES field."""
312-
return self._properties.get("maxLength")
285+
return _helpers._int_or_none(self._properties.get("maxLength"))
313286

314287
@property
315288
def range_element_type(self):
@@ -329,7 +302,7 @@ def fields(self):
329302
330303
Must be empty unset if ``field_type`` is not 'RECORD'.
331304
"""
332-
return self._fields
305+
return tuple(_to_schema_fields(self._properties.get("fields", [])))
333306

334307
@property
335308
def policy_tags(self):
@@ -345,15 +318,10 @@ def to_api_repr(self) -> dict:
345318
Returns:
346319
Dict: A dictionary representing the SchemaField in a serialized form.
347320
"""
348-
answer = self._properties.copy()
349-
350-
# If this is a RECORD type, then sub-fields are also included,
351-
# add this to the serialized representation.
352-
if self.field_type.upper() in _STRUCT_TYPES:
353-
answer["fields"] = [f.to_api_repr() for f in self.fields]
354-
355-
# Done; return the serialized dictionary.
356-
return answer
321+
# Note: we don't make a copy of _properties because this can cause
322+
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
323+
# fields. See https://github.com/googleapis/python-bigquery/issues/6
324+
return self._properties
357325

358326
def _key(self):
359327
"""A tuple key that uniquely describes this field.
@@ -389,7 +357,7 @@ def _key(self):
389357
self.mode.upper(), # pytype: disable=attribute-error
390358
self.default_value_expression,
391359
self.description,
392-
self._fields,
360+
self.fields,
393361
policy_tags,
394362
)
395363

tests/unit/job/test_load_config.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import warnings
1617

1718
import pytest
@@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self):
571572
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
572573
)
573574

574-
def test_schema_setter_invalid_mappings_list(self):
575+
def test_schema_setter_allows_unknown_properties(self):
575576
config = self._get_target_class()()
576577

577578
schema = [
578-
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
579-
{"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"},
579+
{
580+
"name": "full_name",
581+
"type": "STRING",
582+
"mode": "REQUIRED",
583+
"someNewProperty": "test-value",
584+
},
585+
{
586+
"name": "age",
587+
# Note: This type should be included, too. Avoid client-side
588+
# validation, as it could prevent backwards-compatible
589+
# evolution of the server-side behavior.
590+
"typo": "INTEGER",
591+
"mode": "REQUIRED",
592+
"anotherNewProperty": "another-test",
593+
},
580594
]
581595

582-
with self.assertRaises(Exception):
583-
config.schema = schema
596+
# Make sure the setter doesn't mutate schema.
597+
expected_schema = copy.deepcopy(schema)
598+
599+
config.schema = schema
600+
601+
# _properties should include all fields, including unknown ones.
602+
assert config._properties["load"]["schema"]["fields"] == expected_schema
584603

585604
def test_schema_setter_unsetting_schema(self):
586605
from google.cloud.bigquery.schema import SchemaField

tests/unit/test_schema.py

+29-8
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from google.cloud import bigquery
16-
from google.cloud.bigquery.standard_sql import StandardSqlStructType
17-
from google.cloud.bigquery.schema import PolicyTagList
15+
import copy
1816
import unittest
1917
from unittest import mock
2018

2119
import pytest
2220

21+
from google.cloud import bigquery
22+
from google.cloud.bigquery.standard_sql import StandardSqlStructType
23+
from google.cloud.bigquery.schema import PolicyTagList
24+
2325

2426
class TestSchemaField(unittest.TestCase):
2527
@staticmethod
@@ -821,13 +823,32 @@ def test_schema_fields_sequence(self):
821823
result = self._call_fut(schema)
822824
self.assertEqual(result, schema)
823825

824-
def test_invalid_mapping_representation(self):
826+
def test_unknown_properties(self):
825827
schema = [
826-
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
827-
{"name": "address", "typeooo": "STRING", "mode": "REQUIRED"},
828+
{
829+
"name": "full_name",
830+
"type": "STRING",
831+
"mode": "REQUIRED",
832+
"someNewProperty": "test-value",
833+
},
834+
{
835+
"name": "age",
836+
# Note: This type should be included, too. Avoid client-side
837+
# validation, as it could prevent backwards-compatible
838+
# evolution of the server-side behavior.
839+
"typo": "INTEGER",
840+
"mode": "REQUIRED",
841+
"anotherNewProperty": "another-test",
842+
},
828843
]
829-
with self.assertRaises(Exception):
830-
self._call_fut(schema)
844+
845+
# Make sure the setter doesn't mutate schema.
846+
expected_schema = copy.deepcopy(schema)
847+
848+
result = self._call_fut(schema)
849+
850+
for api_repr, field in zip(expected_schema, result):
851+
assert field.to_api_repr() == api_repr
831852

832853
def test_valid_mapping_representation(self):
833854
from google.cloud.bigquery.schema import SchemaField

tests/unit/test_table.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import datetime
1617
import logging
1718
import re
@@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self):
711712
table.schema = [full_name, age]
712713
self.assertEqual(table.schema, [full_name, age])
713714

714-
def test_schema_setter_invalid_mapping_representation(self):
715+
def test_schema_setter_allows_unknown_properties(self):
715716
dataset = DatasetReference(self.PROJECT, self.DS_ID)
716717
table_ref = dataset.table(self.TABLE_NAME)
717718
table = self._make_one(table_ref)
718-
full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}
719-
invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"}
720-
with self.assertRaises(Exception):
721-
table.schema = [full_name, invalid_field]
719+
schema = [
720+
{
721+
"name": "full_name",
722+
"type": "STRING",
723+
"mode": "REQUIRED",
724+
"someNewProperty": "test-value",
725+
},
726+
{
727+
"name": "age",
728+
# Note: This type should be included, too. Avoid client-side
729+
# validation, as it could prevent backwards-compatible
730+
# evolution of the server-side behavior.
731+
"typo": "INTEGER",
732+
"mode": "REQUIRED",
733+
"anotherNewProperty": "another-test",
734+
},
735+
]
736+
737+
# Make sure the setter doesn't mutate schema.
738+
expected_schema = copy.deepcopy(schema)
739+
740+
table.schema = schema
741+
742+
# _properties should include all fields, including unknown ones.
743+
assert table._properties["schema"]["fields"] == expected_schema
722744

723745
def test_schema_setter_valid_mapping_representation(self):
724746
from google.cloud.bigquery.schema import SchemaField

0 commit comments

Comments
 (0)