Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adds the SerDeInfo class and tests #2108

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@

"""Schemas for BigQuery tables / queries."""

from __future__ import annotations
import collections
import enum
from typing import Any, cast, Dict, Iterable, Optional, Union

from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
from google.cloud.bigquery._helpers import (
_isinstance_or_raise,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Use _helpers._isinstance_or_raise in the code instead.

Use import statements for packages and modules only, not for individual types, classes, or functions.

https://google.github.io/styleguide/pyguide.html#22-imports

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great feedback. I had not realized that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

)
from google.cloud.bigquery.enums import StandardSqlTypeNames


Expand Down Expand Up @@ -556,3 +560,88 @@ def to_api_repr(self) -> dict:
"""
answer = {"names": list(self.names)}
return answer


class SerDeInfo:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the name in the REST API? I'd prefer the full names rather than abbreviations, but I'm OK with this if it's to be consistent with the proto/REST API.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SerDeInfo is the name in the proto/REST API definitions.

"""Serializer and deserializer information.
Args:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might need a blank line here for the docs renderer to work?

Suggested change
"""Serializer and deserializer information.
Args:
"""Serializer and deserializer information.
Args:

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

serializationLibrary (str): Required. Specifies a fully-qualified class
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
serializationLibrary (str): Required. Specifies a fully-qualified class
serialization_library (str): Required. Specifies a fully-qualified class

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to transforming to PEP-8 compliant snake_case

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

name of the serialization library that is responsible for the
translation of data between table representation and the underlying
low-level input and output format structures. The maximum length is
256 characters.
name (Optional[str]): Name of the SerDe. The maximum length is 256
characters.
parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
parameters for the serialization library. Maximum size 10 Kib.
"""

def __init__(
self,
serialization_library: str,
name: Optional[str] = None,
parameters: Optional[dict[str, str]] = None,
):
self._properties: Dict[str, Any] = {}
self.serialization_library = serialization_library
self.name = name
self.parameters = parameters

@property
def serialization_library(self) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def serialization_library(self) -> Any:
def serialization_library(self) -> str:

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

"""Required. Specifies a fully-qualified class name of the serialization
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think Required. should only apply to the init.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in chat, I prefer to leave this as is.

library that is responsible for the translation of data between table
representation and the underlying low-level input and output format
structures. The maximum length is 256 characters."""

return self._properties.get("serializationLibrary")

@serialization_library.setter
def serialization_library(self, value: str):
value = _isinstance_or_raise(value, str, none_allowed=False)
self._properties["serializationLibrary"] = value

@property
def name(self) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def name(self) -> Any:
def name(self) -> Optional[str]:

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

"""Optional. Name of the SerDe. The maximum length is 256 characters."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: similarly, we can remove Optional. here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in chat, I prefer to leave this as is.


return self._properties.get("name")

@name.setter
def name(self, value: Optional[str] = None):
value = _isinstance_or_raise(value, str, none_allowed=True)
self._properties["name"] = value

@property
def parameters(self) -> Any:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def parameters(self) -> Any:
def parameters(self) -> Optional[dict[str, str]]:

For consistent type annotations.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved.

"""Optional. Key-value pairs that define the initialization parameters
for the serialization library. Maximum size 10 Kib."""

return self._properties.get("parameters")

@parameters.setter
def parameters(self, value: Optional[dict[str, str]] = None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See reply above from Python Docs about the concept of optional versus typing.Optional.

value = _isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value

def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties

@classmethod
def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
"""Factory: constructs an instance of the class (cls)
given its API representation.
Args:
resource (Dict[str, Any]):
API representation of the object to be instantiated.
Returns:
An instance of the class initialized with data from 'resource'.
"""
config = cls("PLACEHOLDER")
config._properties = api_repr
return config
92 changes: 91 additions & 1 deletion tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery.schema import PolicyTagList
from google.cloud.bigquery.schema import (
PolicyTagList,
SerDeInfo,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

@chalmerlowe chalmerlowe Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. I attempted to correct this for both PolicyTagList and SerDeInfo.
Unfortunately, there is something about how PolicyTagList is handled in our code that did not like the change and it produced several failing tests. I did not want to clutter this PR with unrelated changes, nor did I want to delay the issuance of this PR.

So I resolved this in the following way:

  • I corrected SerDeInfo and throughout the code we reference schema.SerDeInfo.
  • I left the PolicyTagList code exactly as it was found.



class TestSchemaField(unittest.TestCase):
Expand Down Expand Up @@ -1129,3 +1132,90 @@ def test_to_api_repr_parameterized(field, api):
from google.cloud.bigquery.schema import SchemaField

assert SchemaField(**field).to_api_repr() == api


class TestSerDeInfo:
"""Tests for the SerDeInfo class."""

@staticmethod
def _get_target_class():
return SerDeInfo

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

@pytest.mark.parametrize(
"serialization_library,name,parameters",
[
("testpath.to.LazySimpleSerDe", None, None),
("testpath.to.LazySimpleSerDe", "serde_name", None),
("testpath.to.LazySimpleSerDe", None, {"key": "value"}),
("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}),
],
)
def test_ctor_valid_input(self, serialization_library, name, parameters):
serde_info = self._make_one(
serialization_library=serialization_library,
name=name,
parameters=parameters,
)
assert serde_info.serialization_library == serialization_library
assert serde_info.name == name
assert serde_info.parameters == parameters

@pytest.mark.parametrize(
"serialization_library,name,parameters",
[
(123, None, None),
("testpath.to.LazySimpleSerDe", 123, None),
("testpath.to.LazySimpleSerDe", None, ["test", "list"]),
("testpath.to.LazySimpleSerDe", None, 123),
],
)
def test_ctor_invalid_input(self, serialization_library, name, parameters):
with pytest.raises(TypeError) as e:
self._make_one(
serialization_library=serialization_library,
name=name,
parameters=parameters,
)
# Looking for the first word from the string "Pass <variable> as..."
assert "Pass " in str(e.value)

def test_to_api_repr(self):
serde_info = self._make_one(
serialization_library="testpath.to.LazySimpleSerDe",
name="serde_name",
parameters={"key": "value"},
)
expected_repr = {
"serializationLibrary": "testpath.to.LazySimpleSerDe",
"name": "serde_name",
"parameters": {"key": "value"},
}
assert serde_info.to_api_repr() == expected_repr

def test_from_api_repr(self):
"""GIVEN an api representation of a SerDeInfo object (i.e. resource)
WHEN converted into a SerDeInfo object using from_api_repr()
THEN it will have the representation in dict format as a SerDeInfo
object made directly (via _make_one()) and represented in dict format.
"""
api_repr = {
"serializationLibrary": "testpath.to.LazySimpleSerDe",
"name": "serde_name",
"parameters": {"key": "value"},
}

expected = self._make_one(
serialization_library="testpath.to.LazySimpleSerDe",
name="serde_name",
parameters={"key": "value"},
)

klass = self._get_target_class()
result = klass.from_api_repr(api_repr)

# We convert both to dict format because these classes do not have a
# __eq__() method to facilitate direct equality comparisons.
assert result.to_api_repr() == expected.to_api_repr()
Loading