From e88e1a0b6be082bbc9193d04f7ecc91b470bda1f Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 14 Jan 2025 16:35:49 -0800 Subject: [PATCH 1/9] fix: remove record values from transformation error message --- airbyte_cdk/sources/utils/transform.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index d6885e8c3..4344d01a7 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -225,6 +225,16 @@ def transform( logger.warning(self.get_error_message(e)) def get_error_message(self, e: ValidationError) -> str: - instance_json_type = python_to_json[type(e.instance)] + + def _get_type_structure(input_data: Any) -> Any: + if isinstance(input_data, dict): + return {key: _get_type_structure(value) for key, value in input_data.items()} + elif isinstance(input_data, list): + return [_get_type_structure(value) for value in input_data] if input_data else "array" + else: + return type(input_data).__name__.lower() + key_path = "." + ".".join(map(str, e.path)) - return f"Failed to transform value {repr(e.instance)} of type '{instance_json_type}' to '{e.validator_value}', key path: '{key_path}'" + type_structure = _get_type_structure(e.instance) + + return f"Failed to transform value of {type_structure} to '{e.validator_value}'. Key path: '{key_path}'" From d1bb601f121caab597e30373460ad6ab57d0e6cd Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 14 Jan 2025 17:26:01 -0800 Subject: [PATCH 2/9] refactor: clarify message and update tests --- airbyte_cdk/sources/utils/transform.py | 19 ++++++++++--------- unit_tests/sources/utils/test_transform.py | 18 +++++++++--------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 4344d01a7..592644fcc 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -225,16 +225,17 @@ def transform( logger.warning(self.get_error_message(e)) def get_error_message(self, e: ValidationError) -> str: - - def _get_type_structure(input_data: Any) -> Any: + def get_type_structure(input_data: Any) -> Any: if isinstance(input_data, dict): - return {key: _get_type_structure(value) for key, value in input_data.items()} + return { + key: get_type_structure(field_value) for key, field_value in input_data.items() + } elif isinstance(input_data, list): - return [_get_type_structure(value) for value in input_data] if input_data else "array" + return [get_type_structure(item) for item in input_data] if input_data else "array" else: - return type(input_data).__name__.lower() - - key_path = "." + ".".join(map(str, e.path)) - type_structure = _get_type_structure(e.instance) + return python_to_json[type(input_data)] + + field_path = ".".join(map(str, e.path)) + type_structure = get_type_structure(e.instance) - return f"Failed to transform value of {type_structure} to '{e.validator_value}'. Key path: '{key_path}'" + return f"Failed to transform value of type '{type_structure}' to '{e.validator_value}' at path: '{field_path}'" diff --git a/unit_tests/sources/utils/test_transform.py b/unit_tests/sources/utils/test_transform.py index cfa0ebd0c..9b6e2c69c 100644 --- a/unit_tests/sources/utils/test_transform.py +++ b/unit_tests/sources/utils/test_transform.py @@ -104,14 +104,14 @@ COMPLEX_SCHEMA, {"prop": 12, "number_prop": "aa12", "array": [12]}, {"prop": "12", "number_prop": "aa12", "array": ["12"]}, - "Failed to transform value 'aa12' of type 'string' to 'number', key path: '.number_prop'", + "Failed to transform value of type 'string' to 'number' at path: 'number_prop'", ), # Field too_many_types have ambigious type, skip formatting ( COMPLEX_SCHEMA, {"prop": 12, "too_many_types": 1212, "array": [12]}, {"prop": "12", "too_many_types": 1212, "array": ["12"]}, - "Failed to transform value 1212 of type 'integer' to '['boolean', 'null', 'string']', key path: '.too_many_types'", + "Failed to transform value of type 'integer' to '['boolean', 'null', 'string']' at path: 'too_many_types'", ), # Test null field (COMPLEX_SCHEMA, {"prop": None, "array": [12]}, {"prop": "None", "array": ["12"]}, None), @@ -196,7 +196,7 @@ }, {"value": "string"}, {"value": "string"}, - "Failed to transform value 'string' of type 'string' to 'array', key path: '.value'", + "Failed to transform value of type 'string' to 'array' at path: 'value'", ), ( { @@ -205,21 +205,21 @@ }, {"value": {"key": "value"}}, {"value": {"key": "value"}}, - "Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'", + "Failed to transform value of type '{'key': 'string'}' to 'array' at path: 'value'", ), ( # Schema root object is not an object, no convertion should happen {"type": "integer"}, {"value": "12"}, {"value": "12"}, - "Failed to transform value {'value': '12'} of type 'object' to 'integer', key path: '.'", + "Failed to transform value of type '{'value': 'string'}' to 'integer' at path: ''", ), ( # More than one type except null, no conversion should happen {"type": "object", "properties": {"value": {"type": ["string", "boolean", "null"]}}}, {"value": 12}, {"value": 12}, - "Failed to transform value 12 of type 'integer' to '['string', 'boolean', 'null']', key path: '.value'", + "Failed to transform value of type 'integer' to '['string', 'boolean', 'null']' at path: 'value'", ), ( # Oneof not suported, no conversion for one_of_value should happen @@ -252,7 +252,7 @@ }, {"value": {"key": "value"}}, {"value": {"key": "value"}}, - "Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'", + "Failed to transform value of type '{'key': 'string'}' to 'array' at path: 'value'", ), ( { @@ -263,7 +263,7 @@ }, {"value1": "value2"}, {"value1": "value2"}, - "Failed to transform value 'value2' of type 'string' to 'object', key path: '.value1'", + "Failed to transform value of type 'string' to 'object' at path: 'value1'", ), ( { @@ -272,7 +272,7 @@ }, {"value": ["one", "two"]}, {"value": ["one", "two"]}, - "Failed to transform value 'one' of type 'string' to 'object', key path: '.value.0'", + "Failed to transform value of type 'string' to 'object' at path: 'value.0'", ), ], ) From 7111c1a8420e128dbe757a7ffaf12d5c08d7f7c3 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 15 Jan 2025 09:29:25 -0800 Subject: [PATCH 3/9] refactor: clearer message for objects --- airbyte_cdk/sources/utils/transform.py | 19 +++++++++++-------- unit_tests/sources/utils/test_transform.py | 18 +++++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 592644fcc..49c37545c 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -225,17 +225,20 @@ def transform( logger.warning(self.get_error_message(e)) def get_error_message(self, e: ValidationError) -> str: - def get_type_structure(input_data: Any) -> Any: + def _get_type_structure(input_data: Any) -> Any: if isinstance(input_data, dict): - return { - key: get_type_structure(field_value) for key, field_value in input_data.items() - } + structure = {key: _get_type_structure(field_value) for key, field_value in input_data.items()} + return f"object with structure {structure}" elif isinstance(input_data, list): - return [get_type_structure(item) for item in input_data] if input_data else "array" + if not input_data: + # Handle empty array + return "array" + items = [_get_type_structure(item) for item in input_data] + return f"array with structure {items}" else: return python_to_json[type(input_data)] field_path = ".".join(map(str, e.path)) - type_structure = get_type_structure(e.instance) - - return f"Failed to transform value of type '{type_structure}' to '{e.validator_value}' at path: '{field_path}'" + type_structure = _get_type_structure(e.instance) + + return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'" diff --git a/unit_tests/sources/utils/test_transform.py b/unit_tests/sources/utils/test_transform.py index 9b6e2c69c..fa14d7f3e 100644 --- a/unit_tests/sources/utils/test_transform.py +++ b/unit_tests/sources/utils/test_transform.py @@ -104,14 +104,14 @@ COMPLEX_SCHEMA, {"prop": 12, "number_prop": "aa12", "array": [12]}, {"prop": "12", "number_prop": "aa12", "array": ["12"]}, - "Failed to transform value of type 'string' to 'number' at path: 'number_prop'", + "Failed to transform value from type 'string' to type 'number' at path: 'number_prop'", ), # Field too_many_types have ambigious type, skip formatting ( COMPLEX_SCHEMA, {"prop": 12, "too_many_types": 1212, "array": [12]}, {"prop": "12", "too_many_types": 1212, "array": ["12"]}, - "Failed to transform value of type 'integer' to '['boolean', 'null', 'string']' at path: 'too_many_types'", + "Failed to transform value from type 'integer' to type '['boolean', 'null', 'string']' at path: 'too_many_types'", ), # Test null field (COMPLEX_SCHEMA, {"prop": None, "array": [12]}, {"prop": "None", "array": ["12"]}, None), @@ -196,7 +196,7 @@ }, {"value": "string"}, {"value": "string"}, - "Failed to transform value of type 'string' to 'array' at path: 'value'", + "Failed to transform value from type 'string' to type 'array' at path: 'value'", ), ( { @@ -205,21 +205,21 @@ }, {"value": {"key": "value"}}, {"value": {"key": "value"}}, - "Failed to transform value of type '{'key': 'string'}' to 'array' at path: 'value'", + "Failed to transform value from type 'object with structure {'key': 'string'}' to type 'array' at path: 'value'", ), ( # Schema root object is not an object, no convertion should happen {"type": "integer"}, {"value": "12"}, {"value": "12"}, - "Failed to transform value of type '{'value': 'string'}' to 'integer' at path: ''", + "Failed to transform value from type 'object with structure {'value': 'string'}' to type 'integer' at path: ''", ), ( # More than one type except null, no conversion should happen {"type": "object", "properties": {"value": {"type": ["string", "boolean", "null"]}}}, {"value": 12}, {"value": 12}, - "Failed to transform value of type 'integer' to '['string', 'boolean', 'null']' at path: 'value'", + "Failed to transform value from type 'integer' to type '['string', 'boolean', 'null']' at path: 'value'", ), ( # Oneof not suported, no conversion for one_of_value should happen @@ -252,7 +252,7 @@ }, {"value": {"key": "value"}}, {"value": {"key": "value"}}, - "Failed to transform value of type '{'key': 'string'}' to 'array' at path: 'value'", + "Failed to transform value from type 'object with structure {'key': 'string'}' to type 'array' at path: 'value'", ), ( { @@ -263,7 +263,7 @@ }, {"value1": "value2"}, {"value1": "value2"}, - "Failed to transform value of type 'string' to 'object' at path: 'value1'", + "Failed to transform value from type 'string' to type 'object' at path: 'value1'", ), ( { @@ -272,7 +272,7 @@ }, {"value": ["one", "two"]}, {"value": ["one", "two"]}, - "Failed to transform value of type 'string' to 'object' at path: 'value.0'", + "Failed to transform value from type 'string' to type 'object' at path: 'value.0'", ), ], ) From 427423e86c6058607ed0c17042a282673d1af40c Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 15 Jan 2025 09:29:50 -0800 Subject: [PATCH 4/9] chore: format --- airbyte_cdk/sources/utils/transform.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 49c37545c..18d97220a 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -227,7 +227,9 @@ def transform( def get_error_message(self, e: ValidationError) -> str: def _get_type_structure(input_data: Any) -> Any: if isinstance(input_data, dict): - structure = {key: _get_type_structure(field_value) for key, field_value in input_data.items()} + structure = { + key: _get_type_structure(field_value) for key, field_value in input_data.items() + } return f"object with structure {structure}" elif isinstance(input_data, list): if not input_data: @@ -240,5 +242,5 @@ def _get_type_structure(input_data: Any) -> Any: field_path = ".".join(map(str, e.path)) type_structure = _get_type_structure(e.instance) - + return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'" From d982ffe208ff3502e1ee2cc27401f8a5a37fd9b9 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 15 Jan 2025 12:27:25 -0800 Subject: [PATCH 5/9] refactor: remove unneeded array handling --- airbyte_cdk/sources/utils/transform.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 18d97220a..2c366f0f0 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -231,12 +231,6 @@ def _get_type_structure(input_data: Any) -> Any: key: _get_type_structure(field_value) for key, field_value in input_data.items() } return f"object with structure {structure}" - elif isinstance(input_data, list): - if not input_data: - # Handle empty array - return "array" - items = [_get_type_structure(item) for item in input_data] - return f"array with structure {items}" else: return python_to_json[type(input_data)] From a83c1a683772d1d8dac514ff2fc80519ff9cb69f Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 15 Jan 2025 14:05:37 -0800 Subject: [PATCH 6/9] reformat: address review --- airbyte_cdk/sources/utils/transform.py | 46 ++++++++++++++++----- unit_tests/sources/utils/test_transform.py | 47 +++++++++++++++++++++- 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 2c366f0f0..dfbdd25d2 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -9,6 +9,7 @@ from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators +MAX_NESTING_DEPTH = 3 json_to_python_simple = { "string": str, "number": float, @@ -225,16 +226,41 @@ def transform( logger.warning(self.get_error_message(e)) def get_error_message(self, e: ValidationError) -> str: - def _get_type_structure(input_data: Any) -> Any: - if isinstance(input_data, dict): - structure = { - key: _get_type_structure(field_value) for key, field_value in input_data.items() - } - return f"object with structure {structure}" - else: - return python_to_json[type(input_data)] - + """ + Construct a sanitized error message from a ValidationError instance. + """ field_path = ".".join(map(str, e.path)) - type_structure = _get_type_structure(e.instance) + type_structure = self._get_type_structure(e.instance) return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'" + + def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any: + """ + Get the structure of a given input data for use in error message construction. + """ + # Handle null values + if input_data is None: + return "null" + + # Avoid recursing too deep + if current_depth >= MAX_NESTING_DEPTH: + return ( + "object" + if isinstance(input_data, dict) + else "array" + if isinstance(input_data, list) + else python_to_json[type(input_data)] + ) + + if isinstance(input_data, dict): + structure = { + key: self._get_type_structure(field_value, current_depth + 1) + for key, field_value in input_data.items() + } + + if current_depth == 0: + return f"object with structure {structure}" + return structure + + else: + return python_to_json[type(input_data)] diff --git a/unit_tests/sources/utils/test_transform.py b/unit_tests/sources/utils/test_transform.py index fa14d7f3e..5e228948a 100644 --- a/unit_tests/sources/utils/test_transform.py +++ b/unit_tests/sources/utils/test_transform.py @@ -65,7 +65,6 @@ @pytest.mark.parametrize( "schema, actual, expected, expected_warns", [ - (SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None), (SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None), ( SIMPLE_SCHEMA, @@ -274,6 +273,52 @@ {"value": ["one", "two"]}, "Failed to transform value from type 'string' to type 'object' at path: 'value.0'", ), + ( + {"type": "string"}, + None, + None, + "Failed to transform value from type 'null' to type 'string' at path: ''", + ), + ( + {"type": "string"}, + {"a": {"b": {"c": {"d": {"e": "deep value"}}}}}, + {"a": {"b": {"c": {"d": {"e": "deep value"}}}}}, + "Failed to transform value from type 'object with structure {'a': {'b': {'c': 'object'}}}' to type 'string' at path: ''", + ), + ], + ids=[ + "simple_number_to_string", + "preserve_unexpected_fields", + "array_with_mixed_types", + "nested_list_conversion", + "array_in_nested_object", + "string_to_boolean_nested", + "empty_object", + "string_to_integer", + "skip_invalid_number_format", + "skip_ambiguous_types", + "null_to_string", + "preserve_null_when_allowed", + "very_nested_object_conversion", + "null_in_nested_structure", + "object_without_properties", + "array_without_items", + "non_array_to_array", + "number_to_array", + "null_to_array", + "null_preserved_for_nullable_array", + "number_to_string_array", + "string_fails_object_array", + "object_fails_array_with_string_array_items", + "non_object_root_schema", + "multiple_allowed_types", + "oneof_not_supported", + "facebook_cpc_number_conversion", + "object_fails_array_with_string_item", + "string_fails_object_conversion", + "string_fails_object_in_array", + "null_input_data", + "max_nesting_depth_protection", ], ) def test_transform(schema, actual, expected, expected_warns, caplog): From c97cf85f793a349dd0de8139c7f9ca452ad262fa Mon Sep 17 00:00:00 2001 From: Christo Grabowski <108154848+ChristoGrab@users.noreply.github.com> Date: Wed, 15 Jan 2025 17:20:55 -0500 Subject: [PATCH 7/9] Update airbyte_cdk/sources/utils/transform.py Co-authored-by: Ben Church --- airbyte_cdk/sources/utils/transform.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index dfbdd25d2..c995297a2 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -253,14 +253,10 @@ def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any: ) if isinstance(input_data, dict): - structure = { + return { key: self._get_type_structure(field_value, current_depth + 1) for key, field_value in input_data.items() } - if current_depth == 0: - return f"object with structure {structure}" - return structure - else: return python_to_json[type(input_data)] From b6a2c619829dcf0e6f84004e07e8b4c0273188ec Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 15 Jan 2025 14:33:37 -0800 Subject: [PATCH 8/9] chore: update tests --- airbyte_cdk/sources/utils/transform.py | 2 -- unit_tests/sources/utils/test_transform.py | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index c995297a2..30ff2ceba 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -247,8 +247,6 @@ def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any: return ( "object" if isinstance(input_data, dict) - else "array" - if isinstance(input_data, list) else python_to_json[type(input_data)] ) diff --git a/unit_tests/sources/utils/test_transform.py b/unit_tests/sources/utils/test_transform.py index 5e228948a..2965ce3a3 100644 --- a/unit_tests/sources/utils/test_transform.py +++ b/unit_tests/sources/utils/test_transform.py @@ -204,14 +204,14 @@ }, {"value": {"key": "value"}}, {"value": {"key": "value"}}, - "Failed to transform value from type 'object with structure {'key': 'string'}' to type 'array' at path: 'value'", + "Failed to transform value from type '{'key': 'string'}' to type 'array' at path: 'value'", ), ( # Schema root object is not an object, no convertion should happen {"type": "integer"}, {"value": "12"}, {"value": "12"}, - "Failed to transform value from type 'object with structure {'value': 'string'}' to type 'integer' at path: ''", + "Failed to transform value from type '{'value': 'string'}' to type 'integer' at path: ''", ), ( # More than one type except null, no conversion should happen @@ -251,7 +251,7 @@ }, {"value": {"key": "value"}}, {"value": {"key": "value"}}, - "Failed to transform value from type 'object with structure {'key': 'string'}' to type 'array' at path: 'value'", + "Failed to transform value from type '{'key': 'string'}' to type 'array' at path: 'value'", ), ( { @@ -283,7 +283,7 @@ {"type": "string"}, {"a": {"b": {"c": {"d": {"e": "deep value"}}}}}, {"a": {"b": {"c": {"d": {"e": "deep value"}}}}}, - "Failed to transform value from type 'object with structure {'a': {'b': {'c': 'object'}}}' to type 'string' at path: ''", + "Failed to transform value from type '{'a': {'b': {'c': 'object'}}}' to type 'string' at path: ''", ), ], ids=[ From 37383632491d9690ddbe598ae809d72ce764eca6 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 15 Jan 2025 14:54:39 -0800 Subject: [PATCH 9/9] chore: format --- airbyte_cdk/sources/utils/transform.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 30ff2ceba..05c299560 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -244,11 +244,7 @@ def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any: # Avoid recursing too deep if current_depth >= MAX_NESTING_DEPTH: - return ( - "object" - if isinstance(input_data, dict) - else python_to_json[type(input_data)] - ) + return "object" if isinstance(input_data, dict) else python_to_json[type(input_data)] if isinstance(input_data, dict): return {