Skip to content

Commit e3f57a6

Browse files
tswastgcf-owl-bot[bot]
authored andcommitted
feat: preserve unknown fields from the REST API representation in SchemaField (#2097)
* feat: preserve unknown fields from the REST API representaton in `SchemaField` * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unnecessary variable * remove unused private method * fix pytype --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 561f05f commit e3f57a6

File tree

4 files changed

+116
-72
lines changed

4 files changed

+116
-72
lines changed

google/cloud/bigquery/schema.py

+29-61
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020

2121
import copy
2222
import enum
23-
from typing import Any, Dict, Iterable, Optional, Union, cast, List, Mapping
23+
from typing import Any, cast, Dict, Iterable, Optional, Union
2424

25+
from google.cloud.bigquery import _helpers
2526
from google.cloud.bigquery import standard_sql
2627
from google.cloud.bigquery._helpers import (
2728
_isinstance_or_raise,
@@ -218,7 +219,10 @@ def __init__(
218219
rounding_mode: Union[RoundingMode, str, None] = None,
219220
foreign_type_definition: Optional[str] = None,
220221
):
221-
self._properties: Dict[str, Any] = {}
222+
self._properties: Dict[str, Any] = {
223+
"name": name,
224+
"type": field_type,
225+
}
222226

223227
self._properties["name"] = name
224228
if mode is not None:
@@ -259,14 +263,9 @@ def __init__(
259263
)
260264
self._properties["type"] = field_type
261265

262-
self._fields = tuple(fields)
266+
if fields: # Don't set the property if it's not set.
267+
self._properties["fields"] = [field.to_api_repr() for field in fields]
263268

264-
@staticmethod
265-
def __get_int(api_repr, name):
266-
v = api_repr.get(name, _DEFAULT_VALUE)
267-
if v is not _DEFAULT_VALUE:
268-
v = int(v)
269-
return v
270269

271270
@classmethod
272271
def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField":
@@ -280,48 +279,19 @@ def from_api_repr(cls, api_repr: Mapping[str, Any]) -> "SchemaField":
280279
Returns:
281280
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
282281
"""
283-
field_type = api_repr["type"].upper()
284-
285-
# Handle optional properties with default values
286-
mode = api_repr.get("mode", "NULLABLE")
287-
description = api_repr.get("description", _DEFAULT_VALUE)
288-
fields = api_repr.get("fields", ())
289-
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
282+
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
290283

291-
default_value_expression = api_repr.get("defaultValueExpression", None)
284+
# Note: we don't make a copy of api_repr because this can cause
285+
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
286+
# fields. See https://github.com/googleapis/python-bigquery/issues/6
287+
placeholder._properties = api_repr
292288

293-
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
294-
policy_tags = PolicyTagList.from_api_repr(policy_tags)
295-
296-
if api_repr.get("rangeElementType"):
297-
range_element_type = cast(dict, api_repr.get("rangeElementType"))
298-
element_type = range_element_type.get("type")
299-
else:
300-
element_type = None
301-
302-
rounding_mode = api_repr.get("roundingMode")
303-
foreign_type_definition = api_repr.get("foreignTypeDefinition")
304-
305-
return cls(
306-
field_type=field_type,
307-
fields=[cls.from_api_repr(f) for f in fields],
308-
mode=mode.upper(),
309-
default_value_expression=default_value_expression,
310-
description=description,
311-
name=api_repr["name"],
312-
policy_tags=policy_tags,
313-
precision=cls.__get_int(api_repr, "precision"),
314-
scale=cls.__get_int(api_repr, "scale"),
315-
max_length=cls.__get_int(api_repr, "maxLength"),
316-
range_element_type=element_type,
317-
rounding_mode=rounding_mode,
318-
foreign_type_definition=foreign_type_definition,
319-
)
289+
return placeholder
320290

321291
@property
322292
def name(self):
323293
"""str: The name of the field."""
324-
return self._properties["name"]
294+
return self._properties.get("name", "")
325295

326296
@property
327297
def field_type(self):
@@ -330,7 +300,10 @@ def field_type(self):
330300
See:
331301
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
332302
"""
333-
return self._properties["type"]
303+
type_ = self._properties.get("type")
304+
if type_ is None: # Shouldn't happen, but some unit tests do this.
305+
return None
306+
return cast(str, type_).upper()
334307

335308
@property
336309
def mode(self):
@@ -339,7 +312,7 @@ def mode(self):
339312
See:
340313
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
341314
"""
342-
return self._properties.get("mode")
315+
return cast(str, self._properties.get("mode", "NULLABLE")).upper()
343316

344317
@property
345318
def is_nullable(self):
@@ -359,17 +332,17 @@ def description(self):
359332
@property
360333
def precision(self):
361334
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
362-
return self._properties.get("precision")
335+
return _helpers._int_or_none(self._properties.get("precision"))
363336

364337
@property
365338
def scale(self):
366339
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
367-
return self._properties.get("scale")
340+
return _helpers._int_or_none(self._properties.get("scale"))
368341

369342
@property
370343
def max_length(self):
371344
"""Optional[int]: Maximum length for the STRING or BYTES field."""
372-
return self._properties.get("maxLength")
345+
return _helpers._int_or_none(self._properties.get("maxLength"))
373346

374347
@property
375348
def range_element_type(self):
@@ -405,7 +378,7 @@ def fields(self):
405378
406379
Must be empty unset if ``field_type`` is not 'RECORD'.
407380
"""
408-
return self._fields
381+
return tuple(_to_schema_fields(self._properties.get("fields", [])))
409382

410383
@property
411384
def policy_tags(self):
@@ -421,15 +394,10 @@ def to_api_repr(self) -> dict:
421394
Returns:
422395
Dict: A dictionary representing the SchemaField in a serialized form.
423396
"""
424-
answer = self._properties.copy()
425-
426-
# If this is a RECORD type, then sub-fields are also included,
427-
# add this to the serialized representation.
428-
if self.field_type.upper() in _STRUCT_TYPES:
429-
answer["fields"] = [f.to_api_repr() for f in self.fields]
430-
431-
# Done; return the serialized dictionary.
432-
return answer
397+
# Note: we don't make a copy of _properties because this can cause
398+
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
399+
# fields. See https://github.com/googleapis/python-bigquery/issues/6
400+
return self._properties
433401

434402
def _key(self):
435403
"""A tuple key that uniquely describes this field.
@@ -465,7 +433,7 @@ def _key(self):
465433
self.mode.upper(), # pytype: disable=attribute-error
466434
self.default_value_expression,
467435
self.description,
468-
self._fields,
436+
self.fields,
469437
policy_tags,
470438
)
471439

tests/unit/job/test_load_config.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import warnings
1617

1718
import pytest
@@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self):
571572
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
572573
)
573574

574-
def test_schema_setter_invalid_mappings_list(self):
575+
def test_schema_setter_allows_unknown_properties(self):
575576
config = self._get_target_class()()
576577

577578
schema = [
578-
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
579-
{"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"},
579+
{
580+
"name": "full_name",
581+
"type": "STRING",
582+
"mode": "REQUIRED",
583+
"someNewProperty": "test-value",
584+
},
585+
{
586+
"name": "age",
587+
# Note: This type should be included, too. Avoid client-side
588+
# validation, as it could prevent backwards-compatible
589+
# evolution of the server-side behavior.
590+
"typo": "INTEGER",
591+
"mode": "REQUIRED",
592+
"anotherNewProperty": "another-test",
593+
},
580594
]
581595

582-
with self.assertRaises(Exception):
583-
config.schema = schema
596+
# Make sure the setter doesn't mutate schema.
597+
expected_schema = copy.deepcopy(schema)
598+
599+
config.schema = schema
600+
601+
# _properties should include all fields, including unknown ones.
602+
assert config._properties["load"]["schema"]["fields"] == expected_schema
584603

585604
def test_schema_setter_unsetting_schema(self):
586605
from google.cloud.bigquery.schema import SchemaField

tests/unit/test_schema.py

+36-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
<<<<<<< HEAD
1516
from google.cloud import bigquery
1617
from google.cloud.bigquery.enums import RoundingMode
1718
from google.cloud.bigquery.standard_sql import StandardSqlStructType
@@ -27,11 +28,18 @@
2728
_to_schema_fields,
2829
)
2930

31+
=======
32+
import copy
33+
>>>>>>> aaf1eb85 (feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097))
3034
import unittest
3135
from unittest import mock
3236

3337
import pytest
3438

39+
from google.cloud import bigquery
40+
from google.cloud.bigquery.standard_sql import StandardSqlStructType
41+
from google.cloud.bigquery.schema import PolicyTagList
42+
3543

3644
class TestSchemaField(unittest.TestCase):
3745
@staticmethod
@@ -843,13 +851,40 @@ def test_schema_fields_sequence(self):
843851
result = _to_schema_fields(schema)
844852
assert result == schema
845853

846-
def test_invalid_mapping_representation(self):
854+
def test_unknown_properties(self):
847855
schema = [
856+
<<<<<<< HEAD
848857
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
849858
{"name": "address", "invalid_key": "STRING", "mode": "REQUIRED"},
850859
]
851860
with pytest.raises(Exception): # Or a more specific exception if known
852861
_to_schema_fields(schema)
862+
=======
863+
{
864+
"name": "full_name",
865+
"type": "STRING",
866+
"mode": "REQUIRED",
867+
"someNewProperty": "test-value",
868+
},
869+
{
870+
"name": "age",
871+
# Note: This type should be included, too. Avoid client-side
872+
# validation, as it could prevent backwards-compatible
873+
# evolution of the server-side behavior.
874+
"typo": "INTEGER",
875+
"mode": "REQUIRED",
876+
"anotherNewProperty": "another-test",
877+
},
878+
]
879+
880+
# Make sure the setter doesn't mutate schema.
881+
expected_schema = copy.deepcopy(schema)
882+
883+
result = self._call_fut(schema)
884+
885+
for api_repr, field in zip(expected_schema, result):
886+
assert field.to_api_repr() == api_repr
887+
>>>>>>> aaf1eb85 (feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097))
853888

854889
@pytest.mark.parametrize(
855890
"schema, expected_schema",

tests/unit/test_table.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import datetime
1617
import logging
1718
import re
@@ -719,14 +720,35 @@ def test_schema_setter_valid_fields(self):
719720
table.schema = [full_name, age]
720721
self.assertEqual(table.schema, [full_name, age])
721722

722-
def test_schema_setter_invalid_mapping_representation(self):
723+
def test_schema_setter_allows_unknown_properties(self):
723724
dataset = DatasetReference(self.PROJECT, self.DS_ID)
724725
table_ref = dataset.table(self.TABLE_NAME)
725726
table = self._make_one(table_ref)
726-
full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}
727-
invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"}
728-
with self.assertRaises(Exception):
729-
table.schema = [full_name, invalid_field]
727+
schema = [
728+
{
729+
"name": "full_name",
730+
"type": "STRING",
731+
"mode": "REQUIRED",
732+
"someNewProperty": "test-value",
733+
},
734+
{
735+
"name": "age",
736+
# Note: This type should be included, too. Avoid client-side
737+
# validation, as it could prevent backwards-compatible
738+
# evolution of the server-side behavior.
739+
"typo": "INTEGER",
740+
"mode": "REQUIRED",
741+
"anotherNewProperty": "another-test",
742+
},
743+
]
744+
745+
# Make sure the setter doesn't mutate schema.
746+
expected_schema = copy.deepcopy(schema)
747+
748+
table.schema = schema
749+
750+
# _properties should include all fields, including unknown ones.
751+
assert table._properties["schema"]["fields"] == expected_schema
730752

731753
def test_schema_setter_valid_mapping_representation(self):
732754
from google.cloud.bigquery.schema import SchemaField

0 commit comments

Comments
 (0)