Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: sanitize transformation error message to hide record values #221

Merged
merged 11 commits into from
Jan 16, 2025
38 changes: 35 additions & 3 deletions airbyte_cdk/sources/utils/transform.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,7 @@

from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators

MAX_NESTING_DEPTH = 3
json_to_python_simple = {
"string": str,
"number": float,
@@ -225,6 +226,37 @@ def transform(
logger.warning(self.get_error_message(e))

def get_error_message(self, e: ValidationError) -> str:
instance_json_type = python_to_json[type(e.instance)]
key_path = "." + ".".join(map(str, e.path))
return f"Failed to transform value {repr(e.instance)} of type '{instance_json_type}' to '{e.validator_value}', key path: '{key_path}'"
"""
Construct a sanitized error message from a ValidationError instance.
"""
field_path = ".".join(map(str, e.path))
type_structure = self._get_type_structure(e.instance)

return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'"

def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any:
"""
Get the structure of a given input data for use in error message construction.
"""
# Handle null values
if input_data is None:
return "null"

# Avoid recursing too deep
if current_depth >= MAX_NESTING_DEPTH:
return (
"object"
if isinstance(input_data, dict)
else "array"
if isinstance(input_data, list)
else python_to_json[type(input_data)]
)

if isinstance(input_data, dict):
return {
key: self._get_type_structure(field_value, current_depth + 1)
for key, field_value in input_data.items()
}

else:
return python_to_json[type(input_data)]
65 changes: 55 additions & 10 deletions unit_tests/sources/utils/test_transform.py
Original file line number Diff line number Diff line change
@@ -65,7 +65,6 @@
@pytest.mark.parametrize(
"schema, actual, expected, expected_warns",
[
(SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None),
(SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None),
(
SIMPLE_SCHEMA,
@@ -104,14 +103,14 @@
COMPLEX_SCHEMA,
{"prop": 12, "number_prop": "aa12", "array": [12]},
{"prop": "12", "number_prop": "aa12", "array": ["12"]},
"Failed to transform value 'aa12' of type 'string' to 'number', key path: '.number_prop'",
"Failed to transform value from type 'string' to type 'number' at path: 'number_prop'",
),
# Field too_many_types have ambigious type, skip formatting
(
COMPLEX_SCHEMA,
{"prop": 12, "too_many_types": 1212, "array": [12]},
{"prop": "12", "too_many_types": 1212, "array": ["12"]},
"Failed to transform value 1212 of type 'integer' to '['boolean', 'null', 'string']', key path: '.too_many_types'",
"Failed to transform value from type 'integer' to type '['boolean', 'null', 'string']' at path: 'too_many_types'",
),
# Test null field
(COMPLEX_SCHEMA, {"prop": None, "array": [12]}, {"prop": "None", "array": ["12"]}, None),
@@ -196,7 +195,7 @@
},
{"value": "string"},
{"value": "string"},
"Failed to transform value 'string' of type 'string' to 'array', key path: '.value'",
"Failed to transform value from type 'string' to type 'array' at path: 'value'",
),
(
{
@@ -205,21 +204,21 @@
},
{"value": {"key": "value"}},
{"value": {"key": "value"}},
"Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'",
"Failed to transform value from type 'object with structure {'key': 'string'}' to type 'array' at path: 'value'",
),
(
# Schema root object is not an object, no convertion should happen
{"type": "integer"},
{"value": "12"},
{"value": "12"},
"Failed to transform value {'value': '12'} of type 'object' to 'integer', key path: '.'",
"Failed to transform value from type 'object with structure {'value': 'string'}' to type 'integer' at path: ''",
),
(
# More than one type except null, no conversion should happen
{"type": "object", "properties": {"value": {"type": ["string", "boolean", "null"]}}},
{"value": 12},
{"value": 12},
"Failed to transform value 12 of type 'integer' to '['string', 'boolean', 'null']', key path: '.value'",
"Failed to transform value from type 'integer' to type '['string', 'boolean', 'null']' at path: 'value'",
),
(
# Oneof not suported, no conversion for one_of_value should happen
@@ -252,7 +251,7 @@
},
{"value": {"key": "value"}},
{"value": {"key": "value"}},
"Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'",
"Failed to transform value from type 'object with structure {'key': 'string'}' to type 'array' at path: 'value'",
),
(
{
@@ -263,7 +262,7 @@
},
{"value1": "value2"},
{"value1": "value2"},
"Failed to transform value 'value2' of type 'string' to 'object', key path: '.value1'",
"Failed to transform value from type 'string' to type 'object' at path: 'value1'",
),
(
{
@@ -272,9 +271,55 @@
},
{"value": ["one", "two"]},
{"value": ["one", "two"]},
"Failed to transform value 'one' of type 'string' to 'object', key path: '.value.0'",
"Failed to transform value from type 'string' to type 'object' at path: 'value.0'",
),
(
{"type": "string"},
None,
None,
"Failed to transform value from type 'null' to type 'string' at path: ''",
),
(
{"type": "string"},
{"a": {"b": {"c": {"d": {"e": "deep value"}}}}},
{"a": {"b": {"c": {"d": {"e": "deep value"}}}}},
"Failed to transform value from type 'object with structure {'a': {'b': {'c': 'object'}}}' to type 'string' at path: ''",
),
],
ids=[
"simple_number_to_string",
"preserve_unexpected_fields",
"array_with_mixed_types",
"nested_list_conversion",
"array_in_nested_object",
"string_to_boolean_nested",
"empty_object",
"string_to_integer",
"skip_invalid_number_format",
"skip_ambiguous_types",
"null_to_string",
"preserve_null_when_allowed",
"very_nested_object_conversion",
"null_in_nested_structure",
"object_without_properties",
"array_without_items",
"non_array_to_array",
"number_to_array",
"null_to_array",
"null_preserved_for_nullable_array",
"number_to_string_array",
"string_fails_object_array",
"object_fails_array_with_string_array_items",
"non_object_root_schema",
"multiple_allowed_types",
"oneof_not_supported",
"facebook_cpc_number_conversion",
"object_fails_array_with_string_item",
"string_fails_object_conversion",
"string_fails_object_in_array",
"null_input_data",
"max_nesting_depth_protection",
],
)
def test_transform(schema, actual, expected, expected_warns, caplog):
t = TypeTransformer(TransformConfig.DefaultSchemaNormalization)
Loading