Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adds the SerDeInfo class and tests #2108

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions google/cloud/bigquery/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

"""Schemas for BigQuery tables / queries."""

from __future__ import annotations
import collections
import enum
import typing
from typing import Any, cast, Dict, Iterable, Optional, Union

from google.cloud.bigquery import _helpers
Expand Down Expand Up @@ -556,3 +558,89 @@ def to_api_repr(self) -> dict:
"""
answer = {"names": list(self.names)}
return answer


class SerDeInfo:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the name in the REST API? I'd prefer the full names rather than abbreviations, but I'm OK with this if it's to be consistent with the proto/REST API.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SerDeInfo is the name in the proto/REST API definitions.

"""Serializer and deserializer information.

Args:
serialization_library (str): Required. Specifies a fully-qualified class
name of the serialization library that is responsible for the
translation of data between table representation and the underlying
low-level input and output format structures. The maximum length is
256 characters.
name (Optional[str]): Name of the SerDe. The maximum length is 256
characters.
parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
parameters for the serialization library. Maximum size 10 Kib.
"""

def __init__(
self,
serialization_library: str,
name: Optional[str] = None,
parameters: Optional[dict[str, str]] = None,
):
self._properties: Dict[str, Any] = {}
self.serialization_library = serialization_library
self.name = name
self.parameters = parameters

@property
def serialization_library(self) -> str:
"""Required. Specifies a fully-qualified class name of the serialization
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think Required. should only apply to the init.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in chat, I prefer to leave this as is.

library that is responsible for the translation of data between table
representation and the underlying low-level input and output format
structures. The maximum length is 256 characters."""

return typing.cast(str, self._properties.get("serializationLibrary"))

@serialization_library.setter
def serialization_library(self, value: str):
value = _helpers._isinstance_or_raise(value, str, none_allowed=False)
self._properties["serializationLibrary"] = value

@property
def name(self) -> Optional[str]:
"""Optional. Name of the SerDe. The maximum length is 256 characters."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: similarly, we can remove Optional. here.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in chat, I prefer to leave this as is.


return self._properties.get("name")

@name.setter
def name(self, value: Optional[str] = None):
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
self._properties["name"] = value

@property
def parameters(self) -> Optional[dict[str, str]]:
"""Optional. Key-value pairs that define the initialization parameters
for the serialization library. Maximum size 10 Kib."""

return self._properties.get("parameters")

@parameters.setter
def parameters(self, value: Optional[dict[str, str]] = None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See reply above from Python Docs about the concept of optional versus typing.Optional.

value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value

def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties

@classmethod
def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
"""Factory: constructs an instance of the class (cls)
given its API representation.
Args:
resource (Dict[str, Any]):
API representation of the object to be instantiated.
Returns:
An instance of the class initialized with data from 'resource'.
"""
config = cls("PLACEHOLDER")
config._properties = api_repr
return config
92 changes: 88 additions & 4 deletions tests/unit/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from google.cloud import bigquery
from google.cloud.bigquery.standard_sql import StandardSqlStructType
from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import PolicyTagList


Expand Down Expand Up @@ -130,8 +131,6 @@ def test_constructor_range_str(self):
self.assertEqual(field.range_element_type.element_type, "DATETIME")

def test_to_api_repr(self):
from google.cloud.bigquery.schema import PolicyTagList

policy = PolicyTagList(names=("foo", "bar"))
self.assertEqual(
policy.to_api_repr(),
Expand Down Expand Up @@ -886,8 +885,6 @@ def test_valid_mapping_representation(self):
class TestPolicyTags(unittest.TestCase):
@staticmethod
def _get_target_class():
from google.cloud.bigquery.schema import PolicyTagList

return PolicyTagList

def _make_one(self, *args, **kw):
Expand Down Expand Up @@ -1129,3 +1126,90 @@ def test_to_api_repr_parameterized(field, api):
from google.cloud.bigquery.schema import SchemaField

assert SchemaField(**field).to_api_repr() == api


class TestSerDeInfo:
"""Tests for the SerDeInfo class."""

@staticmethod
def _get_target_class():
return schema.SerDeInfo

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

@pytest.mark.parametrize(
"serialization_library,name,parameters",
[
("testpath.to.LazySimpleSerDe", None, None),
("testpath.to.LazySimpleSerDe", "serde_name", None),
("testpath.to.LazySimpleSerDe", None, {"key": "value"}),
("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}),
],
)
def test_ctor_valid_input(self, serialization_library, name, parameters):
serde_info = self._make_one(
serialization_library=serialization_library,
name=name,
parameters=parameters,
)
assert serde_info.serialization_library == serialization_library
assert serde_info.name == name
assert serde_info.parameters == parameters

@pytest.mark.parametrize(
"serialization_library,name,parameters",
[
(123, None, None),
("testpath.to.LazySimpleSerDe", 123, None),
("testpath.to.LazySimpleSerDe", None, ["test", "list"]),
("testpath.to.LazySimpleSerDe", None, 123),
],
)
def test_ctor_invalid_input(self, serialization_library, name, parameters):
with pytest.raises(TypeError) as e:
self._make_one(
serialization_library=serialization_library,
name=name,
parameters=parameters,
)
# Looking for the first word from the string "Pass <variable> as..."
assert "Pass " in str(e.value)

def test_to_api_repr(self):
serde_info = self._make_one(
serialization_library="testpath.to.LazySimpleSerDe",
name="serde_name",
parameters={"key": "value"},
)
expected_repr = {
"serializationLibrary": "testpath.to.LazySimpleSerDe",
"name": "serde_name",
"parameters": {"key": "value"},
}
assert serde_info.to_api_repr() == expected_repr

def test_from_api_repr(self):
"""GIVEN an api representation of a SerDeInfo object (i.e. resource)
WHEN converted into a SerDeInfo object using from_api_repr()
THEN it will have the representation in dict format as a SerDeInfo
object made directly (via _make_one()) and represented in dict format.
"""
api_repr = {
"serializationLibrary": "testpath.to.LazySimpleSerDe",
"name": "serde_name",
"parameters": {"key": "value"},
}

expected = self._make_one(
serialization_library="testpath.to.LazySimpleSerDe",
name="serde_name",
parameters={"key": "value"},
)

klass = self._get_target_class()
result = klass.from_api_repr(api_repr)

# We convert both to dict format because these classes do not have a
# __eq__() method to facilitate direct equality comparisons.
assert result.to_api_repr() == expected.to_api_repr()
Loading