diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f5b03cbef..b062396cf 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,8 +16,9 @@ import collections import enum -from typing import Any, Dict, Iterable, Optional, Union, cast +from typing import Any, cast, Dict, Iterable, Optional, Union +from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -203,15 +204,8 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() - - self._fields = tuple(fields) - - @staticmethod - def __get_int(api_repr, name): - v = api_repr.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v + if fields: # Don't set the property if it's not set. + self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": @@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ - field_type = api_repr["type"].upper() - - # Handle optional properties with default values - mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description", _DEFAULT_VALUE) - fields = api_repr.get("fields", ()) - policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + placeholder = cls("this_will_be_replaced", "PLACEHOLDER") - default_value_expression = api_repr.get("defaultValueExpression", None) + # Note: we don't make a copy of api_repr because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + placeholder._properties = api_repr - if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: - policy_tags = PolicyTagList.from_api_repr(policy_tags) - - if api_repr.get("rangeElementType"): - range_element_type = cast(dict, api_repr.get("rangeElementType")) - element_type = range_element_type.get("type") - else: - element_type = None - - return cls( - field_type=field_type, - fields=[cls.from_api_repr(f) for f in fields], - mode=mode.upper(), - default_value_expression=default_value_expression, - description=description, - name=api_repr["name"], - policy_tags=policy_tags, - precision=cls.__get_int(api_repr, "precision"), - scale=cls.__get_int(api_repr, "scale"), - max_length=cls.__get_int(api_repr, "maxLength"), - range_element_type=element_type, - ) + return placeholder @property def name(self): """str: The name of the field.""" - return self._properties["name"] + return self._properties.get("name", "") @property def field_type(self): @@ -270,7 +240,10 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._properties["type"] + type_ = self._properties.get("type") + if type_ is None: # Shouldn't happen, but some unit tests do this. + return None + return cast(str, type_).upper() @property def mode(self): @@ -279,7 +252,7 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._properties.get("mode") + return cast(str, self._properties.get("mode", "NULLABLE")).upper() @property def is_nullable(self): @@ -299,17 +272,17 @@ def description(self): @property def precision(self): """Optional[int]: Precision (number of digits) for the NUMERIC field.""" - return self._properties.get("precision") + return _helpers._int_or_none(self._properties.get("precision")) @property def scale(self): """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" - return self._properties.get("scale") + return _helpers._int_or_none(self._properties.get("scale")) @property def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" - return self._properties.get("maxLength") + return _helpers._int_or_none(self._properties.get("maxLength")) @property def range_element_type(self): @@ -329,7 +302,7 @@ def fields(self): Must be empty unset if ``field_type`` is not 'RECORD'. """ - return self._fields + return tuple(_to_schema_fields(self._properties.get("fields", []))) @property def policy_tags(self): @@ -345,15 +318,10 @@ def to_api_repr(self) -> dict: Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - answer = self._properties.copy() - - # If this is a RECORD type, then sub-fields are also included, - # add this to the serialized representation. - if self.field_type.upper() in _STRUCT_TYPES: - answer["fields"] = [f.to_api_repr() for f in self.fields] - - # Done; return the serialized dictionary. - return answer + # Note: we don't make a copy of _properties because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + return self._properties def _key(self): """A tuple key that uniquely describes this field. @@ -389,7 +357,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.default_value_expression, self.description, - self._fields, + self.fields, policy_tags, ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index becf3e959..3a681c476 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import warnings import pytest @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) - def test_schema_setter_invalid_mappings_list(self): + def test_schema_setter_allows_unknown_properties(self): config = self._get_target_class()() schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - config.schema = schema + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + config.schema = schema + + # _properties should include all fields, including unknown ones. + assert config._properties["load"]["schema"]["fields"] == expected_schema def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index b17cd0281..4b0b28158 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery -from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +import copy import unittest from unittest import mock import pytest +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.schema import PolicyTagList + class TestSchemaField(unittest.TestCase): @staticmethod @@ -821,13 +823,32 @@ def test_schema_fields_sequence(self): result = self._call_fut(schema) self.assertEqual(result, schema) - def test_invalid_mapping_representation(self): + def test_unknown_properties(self): schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - self._call_fut(schema) + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + result = self._call_fut(schema) + + for api_repr, field in zip(expected_schema, result): + assert field.to_api_repr() == api_repr def test_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3824da226..e9d461e9d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import datetime import logging import re @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) - def test_schema_setter_invalid_mapping_representation(self): + def test_schema_setter_allows_unknown_properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} - invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} - with self.assertRaises(Exception): - table.schema = [full_name, invalid_field] + schema = [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + table.schema = schema + + # _properties should include all fields, including unknown ones. + assert table._properties["schema"]["fields"] == expected_schema def test_schema_setter_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField